1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * fs/f2fs/data.c
4 *
5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6 * http://www.samsung.com/
7 */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/sched/mm.h>
11 #include <linux/mpage.h>
12 #include <linux/writeback.h>
13 #include <linux/pagevec.h>
14 #include <linux/blkdev.h>
15 #include <linux/bio.h>
16 #include <linux/blk-crypto.h>
17 #include <linux/swap.h>
18 #include <linux/prefetch.h>
19 #include <linux/uio.h>
20 #include <linux/sched/signal.h>
21 #include <linux/fiemap.h>
22 #include <linux/iomap.h>
23
24 #include "f2fs.h"
25 #include "node.h"
26 #include "segment.h"
27 #include "iostat.h"
28 #include <trace/events/f2fs.h>
29
30 #define NUM_PREALLOC_POST_READ_CTXS 128
31
32 static struct kmem_cache *bio_post_read_ctx_cache;
33 static struct kmem_cache *bio_entry_slab;
34 static mempool_t *bio_post_read_ctx_pool;
35 static struct bio_set f2fs_bioset;
36
37 #define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE
38
f2fs_init_bioset(void)39 int __init f2fs_init_bioset(void)
40 {
41 return bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
42 0, BIOSET_NEED_BVECS);
43 }
44
f2fs_destroy_bioset(void)45 void f2fs_destroy_bioset(void)
46 {
47 bioset_exit(&f2fs_bioset);
48 }
49
f2fs_is_cp_guaranteed(struct page * page)50 bool f2fs_is_cp_guaranteed(struct page *page)
51 {
52 struct address_space *mapping = page->mapping;
53 struct inode *inode;
54 struct f2fs_sb_info *sbi;
55
56 if (!mapping)
57 return false;
58
59 inode = mapping->host;
60 sbi = F2FS_I_SB(inode);
61
62 if (inode->i_ino == F2FS_META_INO(sbi) ||
63 inode->i_ino == F2FS_NODE_INO(sbi) ||
64 S_ISDIR(inode->i_mode))
65 return true;
66
67 if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
68 page_private_gcing(page))
69 return true;
70 return false;
71 }
72
__read_io_type(struct page * page)73 static enum count_type __read_io_type(struct page *page)
74 {
75 struct address_space *mapping = page_file_mapping(page);
76
77 if (mapping) {
78 struct inode *inode = mapping->host;
79 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
80
81 if (inode->i_ino == F2FS_META_INO(sbi))
82 return F2FS_RD_META;
83
84 if (inode->i_ino == F2FS_NODE_INO(sbi))
85 return F2FS_RD_NODE;
86 }
87 return F2FS_RD_DATA;
88 }
89
90 /* postprocessing steps for read bios */
91 enum bio_post_read_step {
92 #ifdef CONFIG_FS_ENCRYPTION
93 STEP_DECRYPT = BIT(0),
94 #else
95 STEP_DECRYPT = 0, /* compile out the decryption-related code */
96 #endif
97 #ifdef CONFIG_F2FS_FS_COMPRESSION
98 STEP_DECOMPRESS = BIT(1),
99 #else
100 STEP_DECOMPRESS = 0, /* compile out the decompression-related code */
101 #endif
102 #ifdef CONFIG_FS_VERITY
103 STEP_VERITY = BIT(2),
104 #else
105 STEP_VERITY = 0, /* compile out the verity-related code */
106 #endif
107 };
108
109 struct bio_post_read_ctx {
110 struct bio *bio;
111 struct f2fs_sb_info *sbi;
112 struct work_struct work;
113 unsigned int enabled_steps;
114 /*
115 * decompression_attempted keeps track of whether
116 * f2fs_end_read_compressed_page() has been called on the pages in the
117 * bio that belong to a compressed cluster yet.
118 */
119 bool decompression_attempted;
120 block_t fs_blkaddr;
121 };
122
123 /*
124 * Update and unlock a bio's pages, and free the bio.
125 *
126 * This marks pages up-to-date only if there was no error in the bio (I/O error,
127 * decryption error, or verity error), as indicated by bio->bi_status.
128 *
129 * "Compressed pages" (pagecache pages backed by a compressed cluster on-disk)
130 * aren't marked up-to-date here, as decompression is done on a per-compression-
131 * cluster basis rather than a per-bio basis. Instead, we only must do two
132 * things for each compressed page here: call f2fs_end_read_compressed_page()
133 * with failed=true if an error occurred before it would have normally gotten
134 * called (i.e., I/O error or decryption error, but *not* verity error), and
135 * release the bio's reference to the decompress_io_ctx of the page's cluster.
136 */
f2fs_finish_read_bio(struct bio * bio,bool in_task)137 static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
138 {
139 struct bio_vec *bv;
140 struct bvec_iter_all iter_all;
141 struct bio_post_read_ctx *ctx = bio->bi_private;
142
143 bio_for_each_segment_all(bv, bio, iter_all) {
144 struct page *page = bv->bv_page;
145
146 if (f2fs_is_compressed_page(page)) {
147 if (ctx && !ctx->decompression_attempted)
148 f2fs_end_read_compressed_page(page, true, 0,
149 in_task);
150 f2fs_put_page_dic(page, in_task);
151 continue;
152 }
153
154 if (bio->bi_status)
155 ClearPageUptodate(page);
156 else
157 SetPageUptodate(page);
158 dec_page_count(F2FS_P_SB(page), __read_io_type(page));
159 unlock_page(page);
160 }
161
162 if (ctx)
163 mempool_free(ctx, bio_post_read_ctx_pool);
164 bio_put(bio);
165 }
166
f2fs_verify_bio(struct work_struct * work)167 static void f2fs_verify_bio(struct work_struct *work)
168 {
169 struct bio_post_read_ctx *ctx =
170 container_of(work, struct bio_post_read_ctx, work);
171 struct bio *bio = ctx->bio;
172 bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
173
174 /*
175 * fsverity_verify_bio() may call readahead() again, and while verity
176 * will be disabled for this, decryption and/or decompression may still
177 * be needed, resulting in another bio_post_read_ctx being allocated.
178 * So to prevent deadlocks we need to release the current ctx to the
179 * mempool first. This assumes that verity is the last post-read step.
180 */
181 mempool_free(ctx, bio_post_read_ctx_pool);
182 bio->bi_private = NULL;
183
184 /*
185 * Verify the bio's pages with fs-verity. Exclude compressed pages,
186 * as those were handled separately by f2fs_end_read_compressed_page().
187 */
188 if (may_have_compressed_pages) {
189 struct bio_vec *bv;
190 struct bvec_iter_all iter_all;
191
192 bio_for_each_segment_all(bv, bio, iter_all) {
193 struct page *page = bv->bv_page;
194
195 if (!f2fs_is_compressed_page(page) &&
196 !fsverity_verify_page(page)) {
197 bio->bi_status = BLK_STS_IOERR;
198 break;
199 }
200 }
201 } else {
202 fsverity_verify_bio(bio);
203 }
204
205 f2fs_finish_read_bio(bio, true);
206 }
207
208 /*
209 * If the bio's data needs to be verified with fs-verity, then enqueue the
210 * verity work for the bio. Otherwise finish the bio now.
211 *
212 * Note that to avoid deadlocks, the verity work can't be done on the
213 * decryption/decompression workqueue. This is because verifying the data pages
214 * can involve reading verity metadata pages from the file, and these verity
215 * metadata pages may be encrypted and/or compressed.
216 */
f2fs_verify_and_finish_bio(struct bio * bio,bool in_task)217 static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task)
218 {
219 struct bio_post_read_ctx *ctx = bio->bi_private;
220
221 if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
222 INIT_WORK(&ctx->work, f2fs_verify_bio);
223 fsverity_enqueue_verify_work(&ctx->work);
224 } else {
225 f2fs_finish_read_bio(bio, in_task);
226 }
227 }
228
229 /*
230 * Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last
231 * remaining page was read by @ctx->bio.
232 *
233 * Note that a bio may span clusters (even a mix of compressed and uncompressed
234 * clusters) or be for just part of a cluster. STEP_DECOMPRESS just indicates
235 * that the bio includes at least one compressed page. The actual decompression
236 * is done on a per-cluster basis, not a per-bio basis.
237 */
f2fs_handle_step_decompress(struct bio_post_read_ctx * ctx,bool in_task)238 static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx,
239 bool in_task)
240 {
241 struct bio_vec *bv;
242 struct bvec_iter_all iter_all;
243 bool all_compressed = true;
244 block_t blkaddr = ctx->fs_blkaddr;
245
246 bio_for_each_segment_all(bv, ctx->bio, iter_all) {
247 struct page *page = bv->bv_page;
248
249 if (f2fs_is_compressed_page(page))
250 f2fs_end_read_compressed_page(page, false, blkaddr,
251 in_task);
252 else
253 all_compressed = false;
254
255 blkaddr++;
256 }
257
258 ctx->decompression_attempted = true;
259
260 /*
261 * Optimization: if all the bio's pages are compressed, then scheduling
262 * the per-bio verity work is unnecessary, as verity will be fully
263 * handled at the compression cluster level.
264 */
265 if (all_compressed)
266 ctx->enabled_steps &= ~STEP_VERITY;
267 }
268
f2fs_post_read_work(struct work_struct * work)269 static void f2fs_post_read_work(struct work_struct *work)
270 {
271 struct bio_post_read_ctx *ctx =
272 container_of(work, struct bio_post_read_ctx, work);
273 struct bio *bio = ctx->bio;
274
275 if ((ctx->enabled_steps & STEP_DECRYPT) && !fscrypt_decrypt_bio(bio)) {
276 f2fs_finish_read_bio(bio, true);
277 return;
278 }
279
280 if (ctx->enabled_steps & STEP_DECOMPRESS)
281 f2fs_handle_step_decompress(ctx, true);
282
283 f2fs_verify_and_finish_bio(bio, true);
284 }
285
f2fs_read_end_io(struct bio * bio)286 static void f2fs_read_end_io(struct bio *bio)
287 {
288 struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
289 struct bio_post_read_ctx *ctx;
290 bool intask = in_task();
291
292 iostat_update_and_unbind_ctx(bio);
293 ctx = bio->bi_private;
294
295 if (time_to_inject(sbi, FAULT_READ_IO))
296 bio->bi_status = BLK_STS_IOERR;
297
298 if (bio->bi_status) {
299 f2fs_finish_read_bio(bio, intask);
300 return;
301 }
302
303 if (ctx) {
304 unsigned int enabled_steps = ctx->enabled_steps &
305 (STEP_DECRYPT | STEP_DECOMPRESS);
306
307 /*
308 * If we have only decompression step between decompression and
309 * decrypt, we don't need post processing for this.
310 */
311 if (enabled_steps == STEP_DECOMPRESS &&
312 !f2fs_low_mem_mode(sbi)) {
313 f2fs_handle_step_decompress(ctx, intask);
314 } else if (enabled_steps) {
315 INIT_WORK(&ctx->work, f2fs_post_read_work);
316 queue_work(ctx->sbi->post_read_wq, &ctx->work);
317 return;
318 }
319 }
320
321 f2fs_verify_and_finish_bio(bio, intask);
322 }
323
f2fs_write_end_io(struct bio * bio)324 static void f2fs_write_end_io(struct bio *bio)
325 {
326 struct f2fs_sb_info *sbi;
327 struct bio_vec *bvec;
328 struct bvec_iter_all iter_all;
329
330 iostat_update_and_unbind_ctx(bio);
331 sbi = bio->bi_private;
332
333 if (time_to_inject(sbi, FAULT_WRITE_IO))
334 bio->bi_status = BLK_STS_IOERR;
335
336 bio_for_each_segment_all(bvec, bio, iter_all) {
337 struct page *page = bvec->bv_page;
338 enum count_type type = WB_DATA_TYPE(page, false);
339
340 fscrypt_finalize_bounce_page(&page);
341
342 #ifdef CONFIG_F2FS_FS_COMPRESSION
343 if (f2fs_is_compressed_page(page)) {
344 f2fs_compress_write_end_io(bio, page);
345 continue;
346 }
347 #endif
348
349 if (unlikely(bio->bi_status)) {
350 mapping_set_error(page->mapping, -EIO);
351 if (type == F2FS_WB_CP_DATA)
352 f2fs_stop_checkpoint(sbi, true,
353 STOP_CP_REASON_WRITE_FAIL);
354 }
355
356 f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
357 page_folio(page)->index != nid_of_node(page));
358
359 dec_page_count(sbi, type);
360 if (f2fs_in_warm_node_list(sbi, page))
361 f2fs_del_fsync_node_entry(sbi, page);
362 clear_page_private_gcing(page);
363 end_page_writeback(page);
364 }
365 if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
366 wq_has_sleeper(&sbi->cp_wait))
367 wake_up(&sbi->cp_wait);
368
369 bio_put(bio);
370 }
371
372 #ifdef CONFIG_BLK_DEV_ZONED
f2fs_zone_write_end_io(struct bio * bio)373 static void f2fs_zone_write_end_io(struct bio *bio)
374 {
375 struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
376
377 bio->bi_private = io->bi_private;
378 complete(&io->zone_wait);
379 f2fs_write_end_io(bio);
380 }
381 #endif
382
f2fs_target_device(struct f2fs_sb_info * sbi,block_t blk_addr,sector_t * sector)383 struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
384 block_t blk_addr, sector_t *sector)
385 {
386 struct block_device *bdev = sbi->sb->s_bdev;
387 int i;
388
389 if (f2fs_is_multi_device(sbi)) {
390 for (i = 0; i < sbi->s_ndevs; i++) {
391 if (FDEV(i).start_blk <= blk_addr &&
392 FDEV(i).end_blk >= blk_addr) {
393 blk_addr -= FDEV(i).start_blk;
394 bdev = FDEV(i).bdev;
395 break;
396 }
397 }
398 }
399
400 if (sector)
401 *sector = SECTOR_FROM_BLOCK(blk_addr);
402 return bdev;
403 }
404
f2fs_target_device_index(struct f2fs_sb_info * sbi,block_t blkaddr)405 int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
406 {
407 int i;
408
409 if (!f2fs_is_multi_device(sbi))
410 return 0;
411
412 for (i = 0; i < sbi->s_ndevs; i++)
413 if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
414 return i;
415 return 0;
416 }
417
f2fs_io_flags(struct f2fs_io_info * fio)418 static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
419 {
420 unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - 1, 0);
421 unsigned int fua_flag, meta_flag, io_flag;
422 blk_opf_t op_flags = 0;
423
424 if (fio->op != REQ_OP_WRITE)
425 return 0;
426 if (fio->type == DATA)
427 io_flag = fio->sbi->data_io_flag;
428 else if (fio->type == NODE)
429 io_flag = fio->sbi->node_io_flag;
430 else
431 return 0;
432
433 fua_flag = io_flag & temp_mask;
434 meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
435
436 /*
437 * data/node io flag bits per temp:
438 * REQ_META | REQ_FUA |
439 * 5 | 4 | 3 | 2 | 1 | 0 |
440 * Cold | Warm | Hot | Cold | Warm | Hot |
441 */
442 if (BIT(fio->temp) & meta_flag)
443 op_flags |= REQ_META;
444 if (BIT(fio->temp) & fua_flag)
445 op_flags |= REQ_FUA;
446 return op_flags;
447 }
448
__bio_alloc(struct f2fs_io_info * fio,int npages)449 static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
450 {
451 struct f2fs_sb_info *sbi = fio->sbi;
452 struct block_device *bdev;
453 sector_t sector;
454 struct bio *bio;
455
456 bdev = f2fs_target_device(sbi, fio->new_blkaddr, §or);
457 bio = bio_alloc_bioset(bdev, npages,
458 fio->op | fio->op_flags | f2fs_io_flags(fio),
459 GFP_NOIO, &f2fs_bioset);
460 bio->bi_iter.bi_sector = sector;
461 if (is_read_io(fio->op)) {
462 bio->bi_end_io = f2fs_read_end_io;
463 bio->bi_private = NULL;
464 } else {
465 bio->bi_end_io = f2fs_write_end_io;
466 bio->bi_private = sbi;
467 bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
468 fio->type, fio->temp);
469 }
470 iostat_alloc_and_bind_ctx(sbi, bio, NULL);
471
472 if (fio->io_wbc)
473 wbc_init_bio(fio->io_wbc, bio);
474
475 return bio;
476 }
477
f2fs_set_bio_crypt_ctx(struct bio * bio,const struct inode * inode,pgoff_t first_idx,const struct f2fs_io_info * fio,gfp_t gfp_mask)478 static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
479 pgoff_t first_idx,
480 const struct f2fs_io_info *fio,
481 gfp_t gfp_mask)
482 {
483 /*
484 * The f2fs garbage collector sets ->encrypted_page when it wants to
485 * read/write raw data without encryption.
486 */
487 if (!fio || !fio->encrypted_page)
488 fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask);
489 }
490
f2fs_crypt_mergeable_bio(struct bio * bio,const struct inode * inode,pgoff_t next_idx,const struct f2fs_io_info * fio)491 static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
492 pgoff_t next_idx,
493 const struct f2fs_io_info *fio)
494 {
495 /*
496 * The f2fs garbage collector sets ->encrypted_page when it wants to
497 * read/write raw data without encryption.
498 */
499 if (fio && fio->encrypted_page)
500 return !bio_has_crypt_ctx(bio);
501
502 return fscrypt_mergeable_bio(bio, inode, next_idx);
503 }
504
f2fs_submit_read_bio(struct f2fs_sb_info * sbi,struct bio * bio,enum page_type type)505 void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
506 enum page_type type)
507 {
508 WARN_ON_ONCE(!is_read_io(bio_op(bio)));
509 trace_f2fs_submit_read_bio(sbi->sb, type, bio);
510
511 iostat_update_submit_ctx(bio, type);
512 submit_bio(bio);
513 }
514
f2fs_submit_write_bio(struct f2fs_sb_info * sbi,struct bio * bio,enum page_type type)515 static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio,
516 enum page_type type)
517 {
518 WARN_ON_ONCE(is_read_io(bio_op(bio)));
519
520 if (f2fs_lfs_mode(sbi) && current->plug && PAGE_TYPE_ON_MAIN(type))
521 blk_finish_plug(current->plug);
522
523 trace_f2fs_submit_write_bio(sbi->sb, type, bio);
524 iostat_update_submit_ctx(bio, type);
525 submit_bio(bio);
526 }
527
__submit_merged_bio(struct f2fs_bio_info * io)528 static void __submit_merged_bio(struct f2fs_bio_info *io)
529 {
530 struct f2fs_io_info *fio = &io->fio;
531
532 if (!io->bio)
533 return;
534
535 if (is_read_io(fio->op)) {
536 trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
537 f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
538 } else {
539 trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
540 f2fs_submit_write_bio(io->sbi, io->bio, fio->type);
541 }
542 io->bio = NULL;
543 }
544
__has_merged_page(struct bio * bio,struct inode * inode,struct page * page,nid_t ino)545 static bool __has_merged_page(struct bio *bio, struct inode *inode,
546 struct page *page, nid_t ino)
547 {
548 struct bio_vec *bvec;
549 struct bvec_iter_all iter_all;
550
551 if (!bio)
552 return false;
553
554 if (!inode && !page && !ino)
555 return true;
556
557 bio_for_each_segment_all(bvec, bio, iter_all) {
558 struct page *target = bvec->bv_page;
559
560 if (fscrypt_is_bounce_page(target)) {
561 target = fscrypt_pagecache_page(target);
562 if (IS_ERR(target))
563 continue;
564 }
565 if (f2fs_is_compressed_page(target)) {
566 target = f2fs_compress_control_page(target);
567 if (IS_ERR(target))
568 continue;
569 }
570
571 if (inode && inode == target->mapping->host)
572 return true;
573 if (page && page == target)
574 return true;
575 if (ino && ino == ino_of_node(target))
576 return true;
577 }
578
579 return false;
580 }
581
f2fs_init_write_merge_io(struct f2fs_sb_info * sbi)582 int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
583 {
584 int i;
585
586 for (i = 0; i < NR_PAGE_TYPE; i++) {
587 int n = (i == META) ? 1 : NR_TEMP_TYPE;
588 int j;
589
590 sbi->write_io[i] = f2fs_kmalloc(sbi,
591 array_size(n, sizeof(struct f2fs_bio_info)),
592 GFP_KERNEL);
593 if (!sbi->write_io[i])
594 return -ENOMEM;
595
596 for (j = HOT; j < n; j++) {
597 struct f2fs_bio_info *io = &sbi->write_io[i][j];
598
599 init_f2fs_rwsem(&io->io_rwsem);
600 io->sbi = sbi;
601 io->bio = NULL;
602 io->last_block_in_bio = 0;
603 spin_lock_init(&io->io_lock);
604 INIT_LIST_HEAD(&io->io_list);
605 INIT_LIST_HEAD(&io->bio_list);
606 init_f2fs_rwsem(&io->bio_list_lock);
607 #ifdef CONFIG_BLK_DEV_ZONED
608 init_completion(&io->zone_wait);
609 io->zone_pending_bio = NULL;
610 io->bi_private = NULL;
611 #endif
612 }
613 }
614
615 return 0;
616 }
617
__f2fs_submit_merged_write(struct f2fs_sb_info * sbi,enum page_type type,enum temp_type temp)618 static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
619 enum page_type type, enum temp_type temp)
620 {
621 enum page_type btype = PAGE_TYPE_OF_BIO(type);
622 struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
623
624 f2fs_down_write(&io->io_rwsem);
625
626 if (!io->bio)
627 goto unlock_out;
628
629 /* change META to META_FLUSH in the checkpoint procedure */
630 if (type >= META_FLUSH) {
631 io->fio.type = META_FLUSH;
632 io->bio->bi_opf |= REQ_META | REQ_PRIO | REQ_SYNC;
633 if (!test_opt(sbi, NOBARRIER))
634 io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA;
635 }
636 __submit_merged_bio(io);
637 unlock_out:
638 f2fs_up_write(&io->io_rwsem);
639 }
640
__submit_merged_write_cond(struct f2fs_sb_info * sbi,struct inode * inode,struct page * page,nid_t ino,enum page_type type,bool force)641 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
642 struct inode *inode, struct page *page,
643 nid_t ino, enum page_type type, bool force)
644 {
645 enum temp_type temp;
646 bool ret = true;
647
648 for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
649 if (!force) {
650 enum page_type btype = PAGE_TYPE_OF_BIO(type);
651 struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
652
653 f2fs_down_read(&io->io_rwsem);
654 ret = __has_merged_page(io->bio, inode, page, ino);
655 f2fs_up_read(&io->io_rwsem);
656 }
657 if (ret)
658 __f2fs_submit_merged_write(sbi, type, temp);
659
660 /* TODO: use HOT temp only for meta pages now. */
661 if (type >= META)
662 break;
663 }
664 }
665
f2fs_submit_merged_write(struct f2fs_sb_info * sbi,enum page_type type)666 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
667 {
668 __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
669 }
670
f2fs_submit_merged_write_cond(struct f2fs_sb_info * sbi,struct inode * inode,struct page * page,nid_t ino,enum page_type type)671 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
672 struct inode *inode, struct page *page,
673 nid_t ino, enum page_type type)
674 {
675 __submit_merged_write_cond(sbi, inode, page, ino, type, false);
676 }
677
f2fs_flush_merged_writes(struct f2fs_sb_info * sbi)678 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
679 {
680 f2fs_submit_merged_write(sbi, DATA);
681 f2fs_submit_merged_write(sbi, NODE);
682 f2fs_submit_merged_write(sbi, META);
683 }
684
685 /*
686 * Fill the locked page with data located in the block address.
687 * A caller needs to unlock the page on failure.
688 */
f2fs_submit_page_bio(struct f2fs_io_info * fio)689 int f2fs_submit_page_bio(struct f2fs_io_info *fio)
690 {
691 struct bio *bio;
692 struct page *page = fio->encrypted_page ?
693 fio->encrypted_page : fio->page;
694
695 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
696 fio->is_por ? META_POR : (__is_meta_io(fio) ?
697 META_GENERIC : DATA_GENERIC_ENHANCE)))
698 return -EFSCORRUPTED;
699
700 trace_f2fs_submit_page_bio(page, fio);
701
702 /* Allocate a new bio */
703 bio = __bio_alloc(fio, 1);
704
705 f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
706 page_folio(fio->page)->index, fio, GFP_NOIO);
707
708 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
709 bio_put(bio);
710 return -EFAULT;
711 }
712
713 if (fio->io_wbc && !is_read_io(fio->op))
714 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
715
716 inc_page_count(fio->sbi, is_read_io(fio->op) ?
717 __read_io_type(page) : WB_DATA_TYPE(fio->page, false));
718
719 if (is_read_io(bio_op(bio)))
720 f2fs_submit_read_bio(fio->sbi, bio, fio->type);
721 else
722 f2fs_submit_write_bio(fio->sbi, bio, fio->type);
723 return 0;
724 }
725
page_is_mergeable(struct f2fs_sb_info * sbi,struct bio * bio,block_t last_blkaddr,block_t cur_blkaddr)726 static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
727 block_t last_blkaddr, block_t cur_blkaddr)
728 {
729 if (unlikely(sbi->max_io_bytes &&
730 bio->bi_iter.bi_size >= sbi->max_io_bytes))
731 return false;
732 if (last_blkaddr + 1 != cur_blkaddr)
733 return false;
734 return bio->bi_bdev == f2fs_target_device(sbi, cur_blkaddr, NULL);
735 }
736
io_type_is_mergeable(struct f2fs_bio_info * io,struct f2fs_io_info * fio)737 static bool io_type_is_mergeable(struct f2fs_bio_info *io,
738 struct f2fs_io_info *fio)
739 {
740 if (io->fio.op != fio->op)
741 return false;
742 return io->fio.op_flags == fio->op_flags;
743 }
744
io_is_mergeable(struct f2fs_sb_info * sbi,struct bio * bio,struct f2fs_bio_info * io,struct f2fs_io_info * fio,block_t last_blkaddr,block_t cur_blkaddr)745 static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
746 struct f2fs_bio_info *io,
747 struct f2fs_io_info *fio,
748 block_t last_blkaddr,
749 block_t cur_blkaddr)
750 {
751 if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
752 return false;
753 return io_type_is_mergeable(io, fio);
754 }
755
add_bio_entry(struct f2fs_sb_info * sbi,struct bio * bio,struct page * page,enum temp_type temp)756 static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
757 struct page *page, enum temp_type temp)
758 {
759 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
760 struct bio_entry *be;
761
762 be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL);
763 be->bio = bio;
764 bio_get(bio);
765
766 if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
767 f2fs_bug_on(sbi, 1);
768
769 f2fs_down_write(&io->bio_list_lock);
770 list_add_tail(&be->list, &io->bio_list);
771 f2fs_up_write(&io->bio_list_lock);
772 }
773
del_bio_entry(struct bio_entry * be)774 static void del_bio_entry(struct bio_entry *be)
775 {
776 list_del(&be->list);
777 kmem_cache_free(bio_entry_slab, be);
778 }
779
add_ipu_page(struct f2fs_io_info * fio,struct bio ** bio,struct page * page)780 static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
781 struct page *page)
782 {
783 struct f2fs_sb_info *sbi = fio->sbi;
784 enum temp_type temp;
785 bool found = false;
786 int ret = -EAGAIN;
787
788 for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
789 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
790 struct list_head *head = &io->bio_list;
791 struct bio_entry *be;
792
793 f2fs_down_write(&io->bio_list_lock);
794 list_for_each_entry(be, head, list) {
795 if (be->bio != *bio)
796 continue;
797
798 found = true;
799
800 f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio,
801 *fio->last_block,
802 fio->new_blkaddr));
803 if (f2fs_crypt_mergeable_bio(*bio,
804 fio->page->mapping->host,
805 page_folio(fio->page)->index, fio) &&
806 bio_add_page(*bio, page, PAGE_SIZE, 0) ==
807 PAGE_SIZE) {
808 ret = 0;
809 break;
810 }
811
812 /* page can't be merged into bio; submit the bio */
813 del_bio_entry(be);
814 f2fs_submit_write_bio(sbi, *bio, DATA);
815 break;
816 }
817 f2fs_up_write(&io->bio_list_lock);
818 }
819
820 if (ret) {
821 bio_put(*bio);
822 *bio = NULL;
823 }
824
825 return ret;
826 }
827
f2fs_submit_merged_ipu_write(struct f2fs_sb_info * sbi,struct bio ** bio,struct page * page)828 void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
829 struct bio **bio, struct page *page)
830 {
831 enum temp_type temp;
832 bool found = false;
833 struct bio *target = bio ? *bio : NULL;
834
835 f2fs_bug_on(sbi, !target && !page);
836
837 for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
838 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
839 struct list_head *head = &io->bio_list;
840 struct bio_entry *be;
841
842 if (list_empty(head))
843 continue;
844
845 f2fs_down_read(&io->bio_list_lock);
846 list_for_each_entry(be, head, list) {
847 if (target)
848 found = (target == be->bio);
849 else
850 found = __has_merged_page(be->bio, NULL,
851 page, 0);
852 if (found)
853 break;
854 }
855 f2fs_up_read(&io->bio_list_lock);
856
857 if (!found)
858 continue;
859
860 found = false;
861
862 f2fs_down_write(&io->bio_list_lock);
863 list_for_each_entry(be, head, list) {
864 if (target)
865 found = (target == be->bio);
866 else
867 found = __has_merged_page(be->bio, NULL,
868 page, 0);
869 if (found) {
870 target = be->bio;
871 del_bio_entry(be);
872 break;
873 }
874 }
875 f2fs_up_write(&io->bio_list_lock);
876 }
877
878 if (found)
879 f2fs_submit_write_bio(sbi, target, DATA);
880 if (bio && *bio) {
881 bio_put(*bio);
882 *bio = NULL;
883 }
884 }
885
f2fs_merge_page_bio(struct f2fs_io_info * fio)886 int f2fs_merge_page_bio(struct f2fs_io_info *fio)
887 {
888 struct bio *bio = *fio->bio;
889 struct page *page = fio->encrypted_page ?
890 fio->encrypted_page : fio->page;
891
892 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
893 __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
894 return -EFSCORRUPTED;
895
896 trace_f2fs_submit_page_bio(page, fio);
897
898 if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
899 fio->new_blkaddr))
900 f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL);
901 alloc_new:
902 if (!bio) {
903 bio = __bio_alloc(fio, BIO_MAX_VECS);
904 f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
905 page_folio(fio->page)->index, fio, GFP_NOIO);
906
907 add_bio_entry(fio->sbi, bio, page, fio->temp);
908 } else {
909 if (add_ipu_page(fio, &bio, page))
910 goto alloc_new;
911 }
912
913 if (fio->io_wbc)
914 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
915
916 inc_page_count(fio->sbi, WB_DATA_TYPE(page, false));
917
918 *fio->last_block = fio->new_blkaddr;
919 *fio->bio = bio;
920
921 return 0;
922 }
923
924 #ifdef CONFIG_BLK_DEV_ZONED
is_end_zone_blkaddr(struct f2fs_sb_info * sbi,block_t blkaddr)925 static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
926 {
927 struct block_device *bdev = sbi->sb->s_bdev;
928 int devi = 0;
929
930 if (f2fs_is_multi_device(sbi)) {
931 devi = f2fs_target_device_index(sbi, blkaddr);
932 if (blkaddr < FDEV(devi).start_blk ||
933 blkaddr > FDEV(devi).end_blk) {
934 f2fs_err(sbi, "Invalid block %x", blkaddr);
935 return false;
936 }
937 blkaddr -= FDEV(devi).start_blk;
938 bdev = FDEV(devi).bdev;
939 }
940 return bdev_is_zoned(bdev) &&
941 f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
942 (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
943 }
944 #endif
945
f2fs_submit_page_write(struct f2fs_io_info * fio)946 void f2fs_submit_page_write(struct f2fs_io_info *fio)
947 {
948 struct f2fs_sb_info *sbi = fio->sbi;
949 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
950 struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
951 struct page *bio_page;
952 enum count_type type;
953
954 f2fs_bug_on(sbi, is_read_io(fio->op));
955
956 f2fs_down_write(&io->io_rwsem);
957 next:
958 #ifdef CONFIG_BLK_DEV_ZONED
959 if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
960 wait_for_completion_io(&io->zone_wait);
961 bio_put(io->zone_pending_bio);
962 io->zone_pending_bio = NULL;
963 io->bi_private = NULL;
964 }
965 #endif
966
967 if (fio->in_list) {
968 spin_lock(&io->io_lock);
969 if (list_empty(&io->io_list)) {
970 spin_unlock(&io->io_lock);
971 goto out;
972 }
973 fio = list_first_entry(&io->io_list,
974 struct f2fs_io_info, list);
975 list_del(&fio->list);
976 spin_unlock(&io->io_lock);
977 }
978
979 verify_fio_blkaddr(fio);
980
981 if (fio->encrypted_page)
982 bio_page = fio->encrypted_page;
983 else if (fio->compressed_page)
984 bio_page = fio->compressed_page;
985 else
986 bio_page = fio->page;
987
988 /* set submitted = true as a return value */
989 fio->submitted = 1;
990
991 type = WB_DATA_TYPE(bio_page, fio->compressed_page);
992 inc_page_count(sbi, type);
993
994 if (io->bio &&
995 (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
996 fio->new_blkaddr) ||
997 !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host,
998 page_folio(bio_page)->index, fio)))
999 __submit_merged_bio(io);
1000 alloc_new:
1001 if (io->bio == NULL) {
1002 io->bio = __bio_alloc(fio, BIO_MAX_VECS);
1003 f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
1004 page_folio(bio_page)->index, fio, GFP_NOIO);
1005 io->fio = *fio;
1006 }
1007
1008 if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
1009 __submit_merged_bio(io);
1010 goto alloc_new;
1011 }
1012
1013 if (fio->io_wbc)
1014 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
1015
1016 io->last_block_in_bio = fio->new_blkaddr;
1017
1018 trace_f2fs_submit_page_write(fio->page, fio);
1019 #ifdef CONFIG_BLK_DEV_ZONED
1020 if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
1021 is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
1022 bio_get(io->bio);
1023 reinit_completion(&io->zone_wait);
1024 io->bi_private = io->bio->bi_private;
1025 io->bio->bi_private = io;
1026 io->bio->bi_end_io = f2fs_zone_write_end_io;
1027 io->zone_pending_bio = io->bio;
1028 __submit_merged_bio(io);
1029 }
1030 #endif
1031 if (fio->in_list)
1032 goto next;
1033 out:
1034 if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
1035 !f2fs_is_checkpoint_ready(sbi))
1036 __submit_merged_bio(io);
1037 f2fs_up_write(&io->io_rwsem);
1038 }
1039
f2fs_grab_read_bio(struct inode * inode,block_t blkaddr,unsigned nr_pages,blk_opf_t op_flag,pgoff_t first_idx,bool for_write)1040 static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
1041 unsigned nr_pages, blk_opf_t op_flag,
1042 pgoff_t first_idx, bool for_write)
1043 {
1044 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1045 struct bio *bio;
1046 struct bio_post_read_ctx *ctx = NULL;
1047 unsigned int post_read_steps = 0;
1048 sector_t sector;
1049 struct block_device *bdev = f2fs_target_device(sbi, blkaddr, §or);
1050
1051 bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages),
1052 REQ_OP_READ | op_flag,
1053 for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset);
1054 if (!bio)
1055 return ERR_PTR(-ENOMEM);
1056 bio->bi_iter.bi_sector = sector;
1057 f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
1058 bio->bi_end_io = f2fs_read_end_io;
1059
1060 if (fscrypt_inode_uses_fs_layer_crypto(inode))
1061 post_read_steps |= STEP_DECRYPT;
1062
1063 if (f2fs_need_verity(inode, first_idx))
1064 post_read_steps |= STEP_VERITY;
1065
1066 /*
1067 * STEP_DECOMPRESS is handled specially, since a compressed file might
1068 * contain both compressed and uncompressed clusters. We'll allocate a
1069 * bio_post_read_ctx if the file is compressed, but the caller is
1070 * responsible for enabling STEP_DECOMPRESS if it's actually needed.
1071 */
1072
1073 if (post_read_steps || f2fs_compressed_file(inode)) {
1074 /* Due to the mempool, this never fails. */
1075 ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
1076 ctx->bio = bio;
1077 ctx->sbi = sbi;
1078 ctx->enabled_steps = post_read_steps;
1079 ctx->fs_blkaddr = blkaddr;
1080 ctx->decompression_attempted = false;
1081 bio->bi_private = ctx;
1082 }
1083 iostat_alloc_and_bind_ctx(sbi, bio, ctx);
1084
1085 return bio;
1086 }
1087
1088 /* This can handle encryption stuffs */
f2fs_submit_page_read(struct inode * inode,struct folio * folio,block_t blkaddr,blk_opf_t op_flags,bool for_write)1089 static int f2fs_submit_page_read(struct inode *inode, struct folio *folio,
1090 block_t blkaddr, blk_opf_t op_flags,
1091 bool for_write)
1092 {
1093 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1094 struct bio *bio;
1095
1096 bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
1097 folio->index, for_write);
1098 if (IS_ERR(bio))
1099 return PTR_ERR(bio);
1100
1101 /* wait for GCed page writeback via META_MAPPING */
1102 f2fs_wait_on_block_writeback(inode, blkaddr);
1103
1104 if (!bio_add_folio(bio, folio, PAGE_SIZE, 0)) {
1105 iostat_update_and_unbind_ctx(bio);
1106 if (bio->bi_private)
1107 mempool_free(bio->bi_private, bio_post_read_ctx_pool);
1108 bio_put(bio);
1109 return -EFAULT;
1110 }
1111 inc_page_count(sbi, F2FS_RD_DATA);
1112 f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE);
1113 f2fs_submit_read_bio(sbi, bio, DATA);
1114 return 0;
1115 }
1116
__set_data_blkaddr(struct dnode_of_data * dn,block_t blkaddr)1117 static void __set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
1118 {
1119 __le32 *addr = get_dnode_addr(dn->inode, dn->node_page);
1120
1121 dn->data_blkaddr = blkaddr;
1122 addr[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
1123 }
1124
1125 /*
1126 * Lock ordering for the change of data block address:
1127 * ->data_page
1128 * ->node_page
1129 * update block addresses in the node page
1130 */
f2fs_set_data_blkaddr(struct dnode_of_data * dn,block_t blkaddr)1131 void f2fs_set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
1132 {
1133 f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1134 __set_data_blkaddr(dn, blkaddr);
1135 if (set_page_dirty(dn->node_page))
1136 dn->node_changed = true;
1137 }
1138
f2fs_update_data_blkaddr(struct dnode_of_data * dn,block_t blkaddr)1139 void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
1140 {
1141 f2fs_set_data_blkaddr(dn, blkaddr);
1142 f2fs_update_read_extent_cache(dn);
1143 }
1144
1145 /* dn->ofs_in_node will be returned with up-to-date last block pointer */
f2fs_reserve_new_blocks(struct dnode_of_data * dn,blkcnt_t count)1146 int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
1147 {
1148 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1149 int err;
1150
1151 if (!count)
1152 return 0;
1153
1154 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1155 return -EPERM;
1156 err = inc_valid_block_count(sbi, dn->inode, &count, true);
1157 if (unlikely(err))
1158 return err;
1159
1160 trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
1161 dn->ofs_in_node, count);
1162
1163 f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1164
1165 for (; count > 0; dn->ofs_in_node++) {
1166 block_t blkaddr = f2fs_data_blkaddr(dn);
1167
1168 if (blkaddr == NULL_ADDR) {
1169 __set_data_blkaddr(dn, NEW_ADDR);
1170 count--;
1171 }
1172 }
1173
1174 if (set_page_dirty(dn->node_page))
1175 dn->node_changed = true;
1176 return 0;
1177 }
1178
1179 /* Should keep dn->ofs_in_node unchanged */
f2fs_reserve_new_block(struct dnode_of_data * dn)1180 int f2fs_reserve_new_block(struct dnode_of_data *dn)
1181 {
1182 unsigned int ofs_in_node = dn->ofs_in_node;
1183 int ret;
1184
1185 ret = f2fs_reserve_new_blocks(dn, 1);
1186 dn->ofs_in_node = ofs_in_node;
1187 return ret;
1188 }
1189
f2fs_reserve_block(struct dnode_of_data * dn,pgoff_t index)1190 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
1191 {
1192 bool need_put = dn->inode_page ? false : true;
1193 int err;
1194
1195 err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
1196 if (err)
1197 return err;
1198
1199 if (dn->data_blkaddr == NULL_ADDR)
1200 err = f2fs_reserve_new_block(dn);
1201 if (err || need_put)
1202 f2fs_put_dnode(dn);
1203 return err;
1204 }
1205
f2fs_get_read_data_page(struct inode * inode,pgoff_t index,blk_opf_t op_flags,bool for_write,pgoff_t * next_pgofs)1206 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
1207 blk_opf_t op_flags, bool for_write,
1208 pgoff_t *next_pgofs)
1209 {
1210 struct address_space *mapping = inode->i_mapping;
1211 struct dnode_of_data dn;
1212 struct page *page;
1213 int err;
1214
1215 page = f2fs_grab_cache_page(mapping, index, for_write);
1216 if (!page)
1217 return ERR_PTR(-ENOMEM);
1218
1219 if (f2fs_lookup_read_extent_cache_block(inode, index,
1220 &dn.data_blkaddr)) {
1221 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
1222 DATA_GENERIC_ENHANCE_READ)) {
1223 err = -EFSCORRUPTED;
1224 goto put_err;
1225 }
1226 goto got_it;
1227 }
1228
1229 set_new_dnode(&dn, inode, NULL, NULL, 0);
1230 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
1231 if (err) {
1232 if (err == -ENOENT && next_pgofs)
1233 *next_pgofs = f2fs_get_next_page_offset(&dn, index);
1234 goto put_err;
1235 }
1236 f2fs_put_dnode(&dn);
1237
1238 if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
1239 err = -ENOENT;
1240 if (next_pgofs)
1241 *next_pgofs = index + 1;
1242 goto put_err;
1243 }
1244 if (dn.data_blkaddr != NEW_ADDR &&
1245 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
1246 dn.data_blkaddr,
1247 DATA_GENERIC_ENHANCE)) {
1248 err = -EFSCORRUPTED;
1249 goto put_err;
1250 }
1251 got_it:
1252 if (PageUptodate(page)) {
1253 unlock_page(page);
1254 return page;
1255 }
1256
1257 /*
1258 * A new dentry page is allocated but not able to be written, since its
1259 * new inode page couldn't be allocated due to -ENOSPC.
1260 * In such the case, its blkaddr can be remained as NEW_ADDR.
1261 * see, f2fs_add_link -> f2fs_get_new_data_page ->
1262 * f2fs_init_inode_metadata.
1263 */
1264 if (dn.data_blkaddr == NEW_ADDR) {
1265 zero_user_segment(page, 0, PAGE_SIZE);
1266 if (!PageUptodate(page))
1267 SetPageUptodate(page);
1268 unlock_page(page);
1269 return page;
1270 }
1271
1272 err = f2fs_submit_page_read(inode, page_folio(page), dn.data_blkaddr,
1273 op_flags, for_write);
1274 if (err)
1275 goto put_err;
1276 return page;
1277
1278 put_err:
1279 f2fs_put_page(page, 1);
1280 return ERR_PTR(err);
1281 }
1282
f2fs_find_data_page(struct inode * inode,pgoff_t index,pgoff_t * next_pgofs)1283 struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index,
1284 pgoff_t *next_pgofs)
1285 {
1286 struct address_space *mapping = inode->i_mapping;
1287 struct page *page;
1288
1289 page = find_get_page(mapping, index);
1290 if (page && PageUptodate(page))
1291 return page;
1292 f2fs_put_page(page, 0);
1293
1294 page = f2fs_get_read_data_page(inode, index, 0, false, next_pgofs);
1295 if (IS_ERR(page))
1296 return page;
1297
1298 if (PageUptodate(page))
1299 return page;
1300
1301 wait_on_page_locked(page);
1302 if (unlikely(!PageUptodate(page))) {
1303 f2fs_put_page(page, 0);
1304 return ERR_PTR(-EIO);
1305 }
1306 return page;
1307 }
1308
1309 /*
1310 * If it tries to access a hole, return an error.
1311 * Because, the callers, functions in dir.c and GC, should be able to know
1312 * whether this page exists or not.
1313 */
f2fs_get_lock_data_page(struct inode * inode,pgoff_t index,bool for_write)1314 struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
1315 bool for_write)
1316 {
1317 struct address_space *mapping = inode->i_mapping;
1318 struct page *page;
1319
1320 page = f2fs_get_read_data_page(inode, index, 0, for_write, NULL);
1321 if (IS_ERR(page))
1322 return page;
1323
1324 /* wait for read completion */
1325 lock_page(page);
1326 if (unlikely(page->mapping != mapping || !PageUptodate(page))) {
1327 f2fs_put_page(page, 1);
1328 return ERR_PTR(-EIO);
1329 }
1330 return page;
1331 }
1332
1333 /*
1334 * Caller ensures that this data page is never allocated.
1335 * A new zero-filled data page is allocated in the page cache.
1336 *
1337 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
1338 * f2fs_unlock_op().
1339 * Note that, ipage is set only by make_empty_dir, and if any error occur,
1340 * ipage should be released by this function.
1341 */
f2fs_get_new_data_page(struct inode * inode,struct page * ipage,pgoff_t index,bool new_i_size)1342 struct page *f2fs_get_new_data_page(struct inode *inode,
1343 struct page *ipage, pgoff_t index, bool new_i_size)
1344 {
1345 struct address_space *mapping = inode->i_mapping;
1346 struct page *page;
1347 struct dnode_of_data dn;
1348 int err;
1349
1350 page = f2fs_grab_cache_page(mapping, index, true);
1351 if (!page) {
1352 /*
1353 * before exiting, we should make sure ipage will be released
1354 * if any error occur.
1355 */
1356 f2fs_put_page(ipage, 1);
1357 return ERR_PTR(-ENOMEM);
1358 }
1359
1360 set_new_dnode(&dn, inode, ipage, NULL, 0);
1361 err = f2fs_reserve_block(&dn, index);
1362 if (err) {
1363 f2fs_put_page(page, 1);
1364 return ERR_PTR(err);
1365 }
1366 if (!ipage)
1367 f2fs_put_dnode(&dn);
1368
1369 if (PageUptodate(page))
1370 goto got_it;
1371
1372 if (dn.data_blkaddr == NEW_ADDR) {
1373 zero_user_segment(page, 0, PAGE_SIZE);
1374 if (!PageUptodate(page))
1375 SetPageUptodate(page);
1376 } else {
1377 f2fs_put_page(page, 1);
1378
1379 /* if ipage exists, blkaddr should be NEW_ADDR */
1380 f2fs_bug_on(F2FS_I_SB(inode), ipage);
1381 page = f2fs_get_lock_data_page(inode, index, true);
1382 if (IS_ERR(page))
1383 return page;
1384 }
1385 got_it:
1386 if (new_i_size && i_size_read(inode) <
1387 ((loff_t)(index + 1) << PAGE_SHIFT))
1388 f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
1389 return page;
1390 }
1391
__allocate_data_block(struct dnode_of_data * dn,int seg_type)1392 static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
1393 {
1394 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1395 struct f2fs_summary sum;
1396 struct node_info ni;
1397 block_t old_blkaddr;
1398 blkcnt_t count = 1;
1399 int err;
1400
1401 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1402 return -EPERM;
1403
1404 err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
1405 if (err)
1406 return err;
1407
1408 dn->data_blkaddr = f2fs_data_blkaddr(dn);
1409 if (dn->data_blkaddr == NULL_ADDR) {
1410 err = inc_valid_block_count(sbi, dn->inode, &count, true);
1411 if (unlikely(err))
1412 return err;
1413 }
1414
1415 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1416 old_blkaddr = dn->data_blkaddr;
1417 err = f2fs_allocate_data_block(sbi, NULL, old_blkaddr,
1418 &dn->data_blkaddr, &sum, seg_type, NULL);
1419 if (err)
1420 return err;
1421
1422 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
1423 f2fs_invalidate_internal_cache(sbi, old_blkaddr);
1424
1425 f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
1426 return 0;
1427 }
1428
f2fs_map_lock(struct f2fs_sb_info * sbi,int flag)1429 static void f2fs_map_lock(struct f2fs_sb_info *sbi, int flag)
1430 {
1431 if (flag == F2FS_GET_BLOCK_PRE_AIO)
1432 f2fs_down_read(&sbi->node_change);
1433 else
1434 f2fs_lock_op(sbi);
1435 }
1436
f2fs_map_unlock(struct f2fs_sb_info * sbi,int flag)1437 static void f2fs_map_unlock(struct f2fs_sb_info *sbi, int flag)
1438 {
1439 if (flag == F2FS_GET_BLOCK_PRE_AIO)
1440 f2fs_up_read(&sbi->node_change);
1441 else
1442 f2fs_unlock_op(sbi);
1443 }
1444
f2fs_get_block_locked(struct dnode_of_data * dn,pgoff_t index)1445 int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index)
1446 {
1447 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1448 int err = 0;
1449
1450 f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
1451 if (!f2fs_lookup_read_extent_cache_block(dn->inode, index,
1452 &dn->data_blkaddr))
1453 err = f2fs_reserve_block(dn, index);
1454 f2fs_map_unlock(sbi, F2FS_GET_BLOCK_PRE_AIO);
1455
1456 return err;
1457 }
1458
f2fs_map_no_dnode(struct inode * inode,struct f2fs_map_blocks * map,struct dnode_of_data * dn,pgoff_t pgoff)1459 static int f2fs_map_no_dnode(struct inode *inode,
1460 struct f2fs_map_blocks *map, struct dnode_of_data *dn,
1461 pgoff_t pgoff)
1462 {
1463 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1464
1465 /*
1466 * There is one exceptional case that read_node_page() may return
1467 * -ENOENT due to filesystem has been shutdown or cp_error, return
1468 * -EIO in that case.
1469 */
1470 if (map->m_may_create &&
1471 (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || f2fs_cp_error(sbi)))
1472 return -EIO;
1473
1474 if (map->m_next_pgofs)
1475 *map->m_next_pgofs = f2fs_get_next_page_offset(dn, pgoff);
1476 if (map->m_next_extent)
1477 *map->m_next_extent = f2fs_get_next_page_offset(dn, pgoff);
1478 return 0;
1479 }
1480
f2fs_map_blocks_cached(struct inode * inode,struct f2fs_map_blocks * map,int flag)1481 static bool f2fs_map_blocks_cached(struct inode *inode,
1482 struct f2fs_map_blocks *map, int flag)
1483 {
1484 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1485 unsigned int maxblocks = map->m_len;
1486 pgoff_t pgoff = (pgoff_t)map->m_lblk;
1487 struct extent_info ei = {};
1488
1489 if (!f2fs_lookup_read_extent_cache(inode, pgoff, &ei))
1490 return false;
1491
1492 map->m_pblk = ei.blk + pgoff - ei.fofs;
1493 map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgoff);
1494 map->m_flags = F2FS_MAP_MAPPED;
1495 if (map->m_next_extent)
1496 *map->m_next_extent = pgoff + map->m_len;
1497
1498 /* for hardware encryption, but to avoid potential issue in future */
1499 if (flag == F2FS_GET_BLOCK_DIO)
1500 f2fs_wait_on_block_writeback_range(inode,
1501 map->m_pblk, map->m_len);
1502
1503 if (f2fs_allow_multi_device_dio(sbi, flag)) {
1504 int bidx = f2fs_target_device_index(sbi, map->m_pblk);
1505 struct f2fs_dev_info *dev = &sbi->devs[bidx];
1506
1507 map->m_bdev = dev->bdev;
1508 map->m_pblk -= dev->start_blk;
1509 map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk);
1510 } else {
1511 map->m_bdev = inode->i_sb->s_bdev;
1512 }
1513 return true;
1514 }
1515
map_is_mergeable(struct f2fs_sb_info * sbi,struct f2fs_map_blocks * map,block_t blkaddr,int flag,int bidx,int ofs)1516 static bool map_is_mergeable(struct f2fs_sb_info *sbi,
1517 struct f2fs_map_blocks *map,
1518 block_t blkaddr, int flag, int bidx,
1519 int ofs)
1520 {
1521 if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
1522 return false;
1523 if (map->m_pblk != NEW_ADDR && blkaddr == (map->m_pblk + ofs))
1524 return true;
1525 if (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR)
1526 return true;
1527 if (flag == F2FS_GET_BLOCK_PRE_DIO)
1528 return true;
1529 if (flag == F2FS_GET_BLOCK_DIO &&
1530 map->m_pblk == NULL_ADDR && blkaddr == NULL_ADDR)
1531 return true;
1532 return false;
1533 }
1534
1535 /*
1536 * f2fs_map_blocks() tries to find or build mapping relationship which
1537 * maps continuous logical blocks to physical blocks, and return such
1538 * info via f2fs_map_blocks structure.
1539 */
f2fs_map_blocks(struct inode * inode,struct f2fs_map_blocks * map,int flag)1540 int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
1541 {
1542 unsigned int maxblocks = map->m_len;
1543 struct dnode_of_data dn;
1544 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1545 int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
1546 pgoff_t pgofs, end_offset, end;
1547 int err = 0, ofs = 1;
1548 unsigned int ofs_in_node, last_ofs_in_node;
1549 blkcnt_t prealloc;
1550 block_t blkaddr;
1551 unsigned int start_pgofs;
1552 int bidx = 0;
1553 bool is_hole;
1554
1555 if (!maxblocks)
1556 return 0;
1557
1558 if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag))
1559 goto out;
1560
1561 map->m_bdev = inode->i_sb->s_bdev;
1562 map->m_multidev_dio =
1563 f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
1564
1565 map->m_len = 0;
1566 map->m_flags = 0;
1567
1568 /* it only supports block size == page size */
1569 pgofs = (pgoff_t)map->m_lblk;
1570 end = pgofs + maxblocks;
1571
1572 next_dnode:
1573 if (map->m_may_create)
1574 f2fs_map_lock(sbi, flag);
1575
1576 /* When reading holes, we need its node page */
1577 set_new_dnode(&dn, inode, NULL, NULL, 0);
1578 err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
1579 if (err) {
1580 if (flag == F2FS_GET_BLOCK_BMAP)
1581 map->m_pblk = 0;
1582 if (err == -ENOENT)
1583 err = f2fs_map_no_dnode(inode, map, &dn, pgofs);
1584 goto unlock_out;
1585 }
1586
1587 start_pgofs = pgofs;
1588 prealloc = 0;
1589 last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1590 end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1591
1592 next_block:
1593 blkaddr = f2fs_data_blkaddr(&dn);
1594 is_hole = !__is_valid_data_blkaddr(blkaddr);
1595 if (!is_hole &&
1596 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
1597 err = -EFSCORRUPTED;
1598 goto sync_out;
1599 }
1600
1601 /* use out-place-update for direct IO under LFS mode */
1602 if (map->m_may_create && (is_hole ||
1603 (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
1604 !f2fs_is_pinned_file(inode)))) {
1605 if (unlikely(f2fs_cp_error(sbi))) {
1606 err = -EIO;
1607 goto sync_out;
1608 }
1609
1610 switch (flag) {
1611 case F2FS_GET_BLOCK_PRE_AIO:
1612 if (blkaddr == NULL_ADDR) {
1613 prealloc++;
1614 last_ofs_in_node = dn.ofs_in_node;
1615 }
1616 break;
1617 case F2FS_GET_BLOCK_PRE_DIO:
1618 case F2FS_GET_BLOCK_DIO:
1619 err = __allocate_data_block(&dn, map->m_seg_type);
1620 if (err)
1621 goto sync_out;
1622 if (flag == F2FS_GET_BLOCK_PRE_DIO)
1623 file_need_truncate(inode);
1624 set_inode_flag(inode, FI_APPEND_WRITE);
1625 break;
1626 default:
1627 WARN_ON_ONCE(1);
1628 err = -EIO;
1629 goto sync_out;
1630 }
1631
1632 blkaddr = dn.data_blkaddr;
1633 if (is_hole)
1634 map->m_flags |= F2FS_MAP_NEW;
1635 } else if (is_hole) {
1636 if (f2fs_compressed_file(inode) &&
1637 f2fs_sanity_check_cluster(&dn)) {
1638 err = -EFSCORRUPTED;
1639 f2fs_handle_error(sbi,
1640 ERROR_CORRUPTED_CLUSTER);
1641 goto sync_out;
1642 }
1643
1644 switch (flag) {
1645 case F2FS_GET_BLOCK_PRECACHE:
1646 goto sync_out;
1647 case F2FS_GET_BLOCK_BMAP:
1648 map->m_pblk = 0;
1649 goto sync_out;
1650 case F2FS_GET_BLOCK_FIEMAP:
1651 if (blkaddr == NULL_ADDR) {
1652 if (map->m_next_pgofs)
1653 *map->m_next_pgofs = pgofs + 1;
1654 goto sync_out;
1655 }
1656 break;
1657 case F2FS_GET_BLOCK_DIO:
1658 if (map->m_next_pgofs)
1659 *map->m_next_pgofs = pgofs + 1;
1660 break;
1661 default:
1662 /* for defragment case */
1663 if (map->m_next_pgofs)
1664 *map->m_next_pgofs = pgofs + 1;
1665 goto sync_out;
1666 }
1667 }
1668
1669 if (flag == F2FS_GET_BLOCK_PRE_AIO)
1670 goto skip;
1671
1672 if (map->m_multidev_dio)
1673 bidx = f2fs_target_device_index(sbi, blkaddr);
1674
1675 if (map->m_len == 0) {
1676 /* reserved delalloc block should be mapped for fiemap. */
1677 if (blkaddr == NEW_ADDR)
1678 map->m_flags |= F2FS_MAP_DELALLOC;
1679 if (flag != F2FS_GET_BLOCK_DIO || !is_hole)
1680 map->m_flags |= F2FS_MAP_MAPPED;
1681
1682 map->m_pblk = blkaddr;
1683 map->m_len = 1;
1684
1685 if (map->m_multidev_dio)
1686 map->m_bdev = FDEV(bidx).bdev;
1687 } else if (map_is_mergeable(sbi, map, blkaddr, flag, bidx, ofs)) {
1688 ofs++;
1689 map->m_len++;
1690 } else {
1691 goto sync_out;
1692 }
1693
1694 skip:
1695 dn.ofs_in_node++;
1696 pgofs++;
1697
1698 /* preallocate blocks in batch for one dnode page */
1699 if (flag == F2FS_GET_BLOCK_PRE_AIO &&
1700 (pgofs == end || dn.ofs_in_node == end_offset)) {
1701
1702 dn.ofs_in_node = ofs_in_node;
1703 err = f2fs_reserve_new_blocks(&dn, prealloc);
1704 if (err)
1705 goto sync_out;
1706
1707 map->m_len += dn.ofs_in_node - ofs_in_node;
1708 if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
1709 err = -ENOSPC;
1710 goto sync_out;
1711 }
1712 dn.ofs_in_node = end_offset;
1713 }
1714
1715 if (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
1716 map->m_may_create) {
1717 /* the next block to be allocated may not be contiguous. */
1718 if (GET_SEGOFF_FROM_SEG0(sbi, blkaddr) % BLKS_PER_SEC(sbi) ==
1719 CAP_BLKS_PER_SEC(sbi) - 1)
1720 goto sync_out;
1721 }
1722
1723 if (pgofs >= end)
1724 goto sync_out;
1725 else if (dn.ofs_in_node < end_offset)
1726 goto next_block;
1727
1728 if (flag == F2FS_GET_BLOCK_PRECACHE) {
1729 if (map->m_flags & F2FS_MAP_MAPPED) {
1730 unsigned int ofs = start_pgofs - map->m_lblk;
1731
1732 f2fs_update_read_extent_cache_range(&dn,
1733 start_pgofs, map->m_pblk + ofs,
1734 map->m_len - ofs);
1735 }
1736 }
1737
1738 f2fs_put_dnode(&dn);
1739
1740 if (map->m_may_create) {
1741 f2fs_map_unlock(sbi, flag);
1742 f2fs_balance_fs(sbi, dn.node_changed);
1743 }
1744 goto next_dnode;
1745
1746 sync_out:
1747
1748 if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
1749 /*
1750 * for hardware encryption, but to avoid potential issue
1751 * in future
1752 */
1753 f2fs_wait_on_block_writeback_range(inode,
1754 map->m_pblk, map->m_len);
1755
1756 if (map->m_multidev_dio) {
1757 block_t blk_addr = map->m_pblk;
1758
1759 bidx = f2fs_target_device_index(sbi, map->m_pblk);
1760
1761 map->m_bdev = FDEV(bidx).bdev;
1762 map->m_pblk -= FDEV(bidx).start_blk;
1763
1764 if (map->m_may_create)
1765 f2fs_update_device_state(sbi, inode->i_ino,
1766 blk_addr, map->m_len);
1767
1768 f2fs_bug_on(sbi, blk_addr + map->m_len >
1769 FDEV(bidx).end_blk + 1);
1770 }
1771 }
1772
1773 if (flag == F2FS_GET_BLOCK_PRECACHE) {
1774 if (map->m_flags & F2FS_MAP_MAPPED) {
1775 unsigned int ofs = start_pgofs - map->m_lblk;
1776
1777 f2fs_update_read_extent_cache_range(&dn,
1778 start_pgofs, map->m_pblk + ofs,
1779 map->m_len - ofs);
1780 }
1781 if (map->m_next_extent)
1782 *map->m_next_extent = pgofs + 1;
1783 }
1784 f2fs_put_dnode(&dn);
1785 unlock_out:
1786 if (map->m_may_create) {
1787 f2fs_map_unlock(sbi, flag);
1788 f2fs_balance_fs(sbi, dn.node_changed);
1789 }
1790 out:
1791 trace_f2fs_map_blocks(inode, map, flag, err);
1792 return err;
1793 }
1794
f2fs_overwrite_io(struct inode * inode,loff_t pos,size_t len)1795 bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
1796 {
1797 struct f2fs_map_blocks map;
1798 block_t last_lblk;
1799 int err;
1800
1801 if (pos + len > i_size_read(inode))
1802 return false;
1803
1804 map.m_lblk = F2FS_BYTES_TO_BLK(pos);
1805 map.m_next_pgofs = NULL;
1806 map.m_next_extent = NULL;
1807 map.m_seg_type = NO_CHECK_TYPE;
1808 map.m_may_create = false;
1809 last_lblk = F2FS_BLK_ALIGN(pos + len);
1810
1811 while (map.m_lblk < last_lblk) {
1812 map.m_len = last_lblk - map.m_lblk;
1813 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
1814 if (err || map.m_len == 0)
1815 return false;
1816 map.m_lblk += map.m_len;
1817 }
1818 return true;
1819 }
1820
bytes_to_blks(struct inode * inode,u64 bytes)1821 static inline u64 bytes_to_blks(struct inode *inode, u64 bytes)
1822 {
1823 return (bytes >> inode->i_blkbits);
1824 }
1825
blks_to_bytes(struct inode * inode,u64 blks)1826 static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
1827 {
1828 return (blks << inode->i_blkbits);
1829 }
1830
f2fs_xattr_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo)1831 static int f2fs_xattr_fiemap(struct inode *inode,
1832 struct fiemap_extent_info *fieinfo)
1833 {
1834 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1835 struct page *page;
1836 struct node_info ni;
1837 __u64 phys = 0, len;
1838 __u32 flags;
1839 nid_t xnid = F2FS_I(inode)->i_xattr_nid;
1840 int err = 0;
1841
1842 if (f2fs_has_inline_xattr(inode)) {
1843 int offset;
1844
1845 page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
1846 inode->i_ino, false);
1847 if (!page)
1848 return -ENOMEM;
1849
1850 err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
1851 if (err) {
1852 f2fs_put_page(page, 1);
1853 return err;
1854 }
1855
1856 phys = blks_to_bytes(inode, ni.blk_addr);
1857 offset = offsetof(struct f2fs_inode, i_addr) +
1858 sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1859 get_inline_xattr_addrs(inode));
1860
1861 phys += offset;
1862 len = inline_xattr_size(inode);
1863
1864 f2fs_put_page(page, 1);
1865
1866 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
1867
1868 if (!xnid)
1869 flags |= FIEMAP_EXTENT_LAST;
1870
1871 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1872 trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1873 if (err)
1874 return err;
1875 }
1876
1877 if (xnid) {
1878 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
1879 if (!page)
1880 return -ENOMEM;
1881
1882 err = f2fs_get_node_info(sbi, xnid, &ni, false);
1883 if (err) {
1884 f2fs_put_page(page, 1);
1885 return err;
1886 }
1887
1888 phys = blks_to_bytes(inode, ni.blk_addr);
1889 len = inode->i_sb->s_blocksize;
1890
1891 f2fs_put_page(page, 1);
1892
1893 flags = FIEMAP_EXTENT_LAST;
1894 }
1895
1896 if (phys) {
1897 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1898 trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1899 }
1900
1901 return (err < 0 ? err : 0);
1902 }
1903
max_inode_blocks(struct inode * inode)1904 static loff_t max_inode_blocks(struct inode *inode)
1905 {
1906 loff_t result = ADDRS_PER_INODE(inode);
1907 loff_t leaf_count = ADDRS_PER_BLOCK(inode);
1908
1909 /* two direct node blocks */
1910 result += (leaf_count * 2);
1911
1912 /* two indirect node blocks */
1913 leaf_count *= NIDS_PER_BLOCK;
1914 result += (leaf_count * 2);
1915
1916 /* one double indirect node block */
1917 leaf_count *= NIDS_PER_BLOCK;
1918 result += leaf_count;
1919
1920 return result;
1921 }
1922
f2fs_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo,u64 start,u64 len)1923 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1924 u64 start, u64 len)
1925 {
1926 struct f2fs_map_blocks map;
1927 sector_t start_blk, last_blk;
1928 pgoff_t next_pgofs;
1929 u64 logical = 0, phys = 0, size = 0;
1930 u32 flags = 0;
1931 int ret = 0;
1932 bool compr_cluster = false, compr_appended;
1933 unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
1934 unsigned int count_in_cluster = 0;
1935 loff_t maxbytes;
1936
1937 if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
1938 ret = f2fs_precache_extents(inode);
1939 if (ret)
1940 return ret;
1941 }
1942
1943 ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR);
1944 if (ret)
1945 return ret;
1946
1947 inode_lock_shared(inode);
1948
1949 maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
1950 if (start > maxbytes) {
1951 ret = -EFBIG;
1952 goto out;
1953 }
1954
1955 if (len > maxbytes || (maxbytes - len) < start)
1956 len = maxbytes - start;
1957
1958 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
1959 ret = f2fs_xattr_fiemap(inode, fieinfo);
1960 goto out;
1961 }
1962
1963 if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
1964 ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
1965 if (ret != -EAGAIN)
1966 goto out;
1967 }
1968
1969 if (bytes_to_blks(inode, len) == 0)
1970 len = blks_to_bytes(inode, 1);
1971
1972 start_blk = bytes_to_blks(inode, start);
1973 last_blk = bytes_to_blks(inode, start + len - 1);
1974
1975 next:
1976 memset(&map, 0, sizeof(map));
1977 map.m_lblk = start_blk;
1978 map.m_len = bytes_to_blks(inode, len);
1979 map.m_next_pgofs = &next_pgofs;
1980 map.m_seg_type = NO_CHECK_TYPE;
1981
1982 if (compr_cluster) {
1983 map.m_lblk += 1;
1984 map.m_len = cluster_size - count_in_cluster;
1985 }
1986
1987 ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP);
1988 if (ret)
1989 goto out;
1990
1991 /* HOLE */
1992 if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
1993 start_blk = next_pgofs;
1994
1995 if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode,
1996 max_inode_blocks(inode)))
1997 goto prep_next;
1998
1999 flags |= FIEMAP_EXTENT_LAST;
2000 }
2001
2002 compr_appended = false;
2003 /* In a case of compressed cluster, append this to the last extent */
2004 if (compr_cluster && ((map.m_flags & F2FS_MAP_DELALLOC) ||
2005 !(map.m_flags & F2FS_MAP_FLAGS))) {
2006 compr_appended = true;
2007 goto skip_fill;
2008 }
2009
2010 if (size) {
2011 flags |= FIEMAP_EXTENT_MERGED;
2012 if (IS_ENCRYPTED(inode))
2013 flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
2014
2015 ret = fiemap_fill_next_extent(fieinfo, logical,
2016 phys, size, flags);
2017 trace_f2fs_fiemap(inode, logical, phys, size, flags, ret);
2018 if (ret)
2019 goto out;
2020 size = 0;
2021 }
2022
2023 if (start_blk > last_blk)
2024 goto out;
2025
2026 skip_fill:
2027 if (map.m_pblk == COMPRESS_ADDR) {
2028 compr_cluster = true;
2029 count_in_cluster = 1;
2030 } else if (compr_appended) {
2031 unsigned int appended_blks = cluster_size -
2032 count_in_cluster + 1;
2033 size += blks_to_bytes(inode, appended_blks);
2034 start_blk += appended_blks;
2035 compr_cluster = false;
2036 } else {
2037 logical = blks_to_bytes(inode, start_blk);
2038 phys = __is_valid_data_blkaddr(map.m_pblk) ?
2039 blks_to_bytes(inode, map.m_pblk) : 0;
2040 size = blks_to_bytes(inode, map.m_len);
2041 flags = 0;
2042
2043 if (compr_cluster) {
2044 flags = FIEMAP_EXTENT_ENCODED;
2045 count_in_cluster += map.m_len;
2046 if (count_in_cluster == cluster_size) {
2047 compr_cluster = false;
2048 size += blks_to_bytes(inode, 1);
2049 }
2050 } else if (map.m_flags & F2FS_MAP_DELALLOC) {
2051 flags = FIEMAP_EXTENT_UNWRITTEN;
2052 }
2053
2054 start_blk += bytes_to_blks(inode, size);
2055 }
2056
2057 prep_next:
2058 cond_resched();
2059 if (fatal_signal_pending(current))
2060 ret = -EINTR;
2061 else
2062 goto next;
2063 out:
2064 if (ret == 1)
2065 ret = 0;
2066
2067 inode_unlock_shared(inode);
2068 return ret;
2069 }
2070
f2fs_readpage_limit(struct inode * inode)2071 static inline loff_t f2fs_readpage_limit(struct inode *inode)
2072 {
2073 if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
2074 return F2FS_BLK_TO_BYTES(max_file_blocks(inode));
2075
2076 return i_size_read(inode);
2077 }
2078
f2fs_ra_op_flags(struct readahead_control * rac)2079 static inline blk_opf_t f2fs_ra_op_flags(struct readahead_control *rac)
2080 {
2081 return rac ? REQ_RAHEAD : 0;
2082 }
2083
f2fs_read_single_page(struct inode * inode,struct folio * folio,unsigned nr_pages,struct f2fs_map_blocks * map,struct bio ** bio_ret,sector_t * last_block_in_bio,struct readahead_control * rac)2084 static int f2fs_read_single_page(struct inode *inode, struct folio *folio,
2085 unsigned nr_pages,
2086 struct f2fs_map_blocks *map,
2087 struct bio **bio_ret,
2088 sector_t *last_block_in_bio,
2089 struct readahead_control *rac)
2090 {
2091 struct bio *bio = *bio_ret;
2092 const unsigned blocksize = blks_to_bytes(inode, 1);
2093 sector_t block_in_file;
2094 sector_t last_block;
2095 sector_t last_block_in_file;
2096 sector_t block_nr;
2097 pgoff_t index = folio_index(folio);
2098 int ret = 0;
2099
2100 block_in_file = (sector_t)index;
2101 last_block = block_in_file + nr_pages;
2102 last_block_in_file = bytes_to_blks(inode,
2103 f2fs_readpage_limit(inode) + blocksize - 1);
2104 if (last_block > last_block_in_file)
2105 last_block = last_block_in_file;
2106
2107 /* just zeroing out page which is beyond EOF */
2108 if (block_in_file >= last_block)
2109 goto zero_out;
2110 /*
2111 * Map blocks using the previous result first.
2112 */
2113 if ((map->m_flags & F2FS_MAP_MAPPED) &&
2114 block_in_file > map->m_lblk &&
2115 block_in_file < (map->m_lblk + map->m_len))
2116 goto got_it;
2117
2118 /*
2119 * Then do more f2fs_map_blocks() calls until we are
2120 * done with this page.
2121 */
2122 map->m_lblk = block_in_file;
2123 map->m_len = last_block - block_in_file;
2124
2125 ret = f2fs_map_blocks(inode, map, F2FS_GET_BLOCK_DEFAULT);
2126 if (ret)
2127 goto out;
2128 got_it:
2129 if ((map->m_flags & F2FS_MAP_MAPPED)) {
2130 block_nr = map->m_pblk + block_in_file - map->m_lblk;
2131 folio_set_mappedtodisk(folio);
2132
2133 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
2134 DATA_GENERIC_ENHANCE_READ)) {
2135 ret = -EFSCORRUPTED;
2136 goto out;
2137 }
2138 } else {
2139 zero_out:
2140 folio_zero_segment(folio, 0, folio_size(folio));
2141 if (f2fs_need_verity(inode, index) &&
2142 !fsverity_verify_folio(folio)) {
2143 ret = -EIO;
2144 goto out;
2145 }
2146 if (!folio_test_uptodate(folio))
2147 folio_mark_uptodate(folio);
2148 folio_unlock(folio);
2149 goto out;
2150 }
2151
2152 /*
2153 * This page will go to BIO. Do we need to send this
2154 * BIO off first?
2155 */
2156 if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
2157 *last_block_in_bio, block_nr) ||
2158 !f2fs_crypt_mergeable_bio(bio, inode, index, NULL))) {
2159 submit_and_realloc:
2160 f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
2161 bio = NULL;
2162 }
2163 if (bio == NULL) {
2164 bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
2165 f2fs_ra_op_flags(rac), index,
2166 false);
2167 if (IS_ERR(bio)) {
2168 ret = PTR_ERR(bio);
2169 bio = NULL;
2170 goto out;
2171 }
2172 }
2173
2174 /*
2175 * If the page is under writeback, we need to wait for
2176 * its completion to see the correct decrypted data.
2177 */
2178 f2fs_wait_on_block_writeback(inode, block_nr);
2179
2180 if (!bio_add_folio(bio, folio, blocksize, 0))
2181 goto submit_and_realloc;
2182
2183 inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
2184 f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO,
2185 F2FS_BLKSIZE);
2186 *last_block_in_bio = block_nr;
2187 out:
2188 *bio_ret = bio;
2189 return ret;
2190 }
2191
2192 #ifdef CONFIG_F2FS_FS_COMPRESSION
f2fs_read_multi_pages(struct compress_ctx * cc,struct bio ** bio_ret,unsigned nr_pages,sector_t * last_block_in_bio,struct readahead_control * rac,bool for_write)2193 int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
2194 unsigned nr_pages, sector_t *last_block_in_bio,
2195 struct readahead_control *rac, bool for_write)
2196 {
2197 struct dnode_of_data dn;
2198 struct inode *inode = cc->inode;
2199 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2200 struct bio *bio = *bio_ret;
2201 unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
2202 sector_t last_block_in_file;
2203 const unsigned blocksize = blks_to_bytes(inode, 1);
2204 struct decompress_io_ctx *dic = NULL;
2205 struct extent_info ei = {};
2206 bool from_dnode = true;
2207 int i;
2208 int ret = 0;
2209
2210 f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
2211
2212 last_block_in_file = bytes_to_blks(inode,
2213 f2fs_readpage_limit(inode) + blocksize - 1);
2214
2215 /* get rid of pages beyond EOF */
2216 for (i = 0; i < cc->cluster_size; i++) {
2217 struct page *page = cc->rpages[i];
2218 struct folio *folio;
2219
2220 if (!page)
2221 continue;
2222
2223 folio = page_folio(page);
2224 if ((sector_t)folio->index >= last_block_in_file) {
2225 folio_zero_segment(folio, 0, folio_size(folio));
2226 if (!folio_test_uptodate(folio))
2227 folio_mark_uptodate(folio);
2228 } else if (!folio_test_uptodate(folio)) {
2229 continue;
2230 }
2231 folio_unlock(folio);
2232 if (for_write)
2233 folio_put(folio);
2234 cc->rpages[i] = NULL;
2235 cc->nr_rpages--;
2236 }
2237
2238 /* we are done since all pages are beyond EOF */
2239 if (f2fs_cluster_is_empty(cc))
2240 goto out;
2241
2242 if (f2fs_lookup_read_extent_cache(inode, start_idx, &ei))
2243 from_dnode = false;
2244
2245 if (!from_dnode)
2246 goto skip_reading_dnode;
2247
2248 set_new_dnode(&dn, inode, NULL, NULL, 0);
2249 ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
2250 if (ret)
2251 goto out;
2252
2253 if (unlikely(f2fs_cp_error(sbi))) {
2254 ret = -EIO;
2255 goto out_put_dnode;
2256 }
2257 f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
2258
2259 skip_reading_dnode:
2260 for (i = 1; i < cc->cluster_size; i++) {
2261 block_t blkaddr;
2262
2263 blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
2264 dn.ofs_in_node + i) :
2265 ei.blk + i - 1;
2266
2267 if (!__is_valid_data_blkaddr(blkaddr))
2268 break;
2269
2270 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
2271 ret = -EFAULT;
2272 goto out_put_dnode;
2273 }
2274 cc->nr_cpages++;
2275
2276 if (!from_dnode && i >= ei.c_len)
2277 break;
2278 }
2279
2280 /* nothing to decompress */
2281 if (cc->nr_cpages == 0) {
2282 ret = 0;
2283 goto out_put_dnode;
2284 }
2285
2286 dic = f2fs_alloc_dic(cc);
2287 if (IS_ERR(dic)) {
2288 ret = PTR_ERR(dic);
2289 goto out_put_dnode;
2290 }
2291
2292 for (i = 0; i < cc->nr_cpages; i++) {
2293 struct folio *folio = page_folio(dic->cpages[i]);
2294 block_t blkaddr;
2295 struct bio_post_read_ctx *ctx;
2296
2297 blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
2298 dn.ofs_in_node + i + 1) :
2299 ei.blk + i;
2300
2301 f2fs_wait_on_block_writeback(inode, blkaddr);
2302
2303 if (f2fs_load_compressed_page(sbi, folio_page(folio, 0),
2304 blkaddr)) {
2305 if (atomic_dec_and_test(&dic->remaining_pages)) {
2306 f2fs_decompress_cluster(dic, true);
2307 break;
2308 }
2309 continue;
2310 }
2311
2312 if (bio && (!page_is_mergeable(sbi, bio,
2313 *last_block_in_bio, blkaddr) ||
2314 !f2fs_crypt_mergeable_bio(bio, inode, folio->index, NULL))) {
2315 submit_and_realloc:
2316 f2fs_submit_read_bio(sbi, bio, DATA);
2317 bio = NULL;
2318 }
2319
2320 if (!bio) {
2321 bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
2322 f2fs_ra_op_flags(rac),
2323 folio->index, for_write);
2324 if (IS_ERR(bio)) {
2325 ret = PTR_ERR(bio);
2326 f2fs_decompress_end_io(dic, ret, true);
2327 f2fs_put_dnode(&dn);
2328 *bio_ret = NULL;
2329 return ret;
2330 }
2331 }
2332
2333 if (!bio_add_folio(bio, folio, blocksize, 0))
2334 goto submit_and_realloc;
2335
2336 ctx = get_post_read_ctx(bio);
2337 ctx->enabled_steps |= STEP_DECOMPRESS;
2338 refcount_inc(&dic->refcnt);
2339
2340 inc_page_count(sbi, F2FS_RD_DATA);
2341 f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE);
2342 *last_block_in_bio = blkaddr;
2343 }
2344
2345 if (from_dnode)
2346 f2fs_put_dnode(&dn);
2347
2348 *bio_ret = bio;
2349 return 0;
2350
2351 out_put_dnode:
2352 if (from_dnode)
2353 f2fs_put_dnode(&dn);
2354 out:
2355 for (i = 0; i < cc->cluster_size; i++) {
2356 if (cc->rpages[i]) {
2357 ClearPageUptodate(cc->rpages[i]);
2358 unlock_page(cc->rpages[i]);
2359 }
2360 }
2361 *bio_ret = bio;
2362 return ret;
2363 }
2364 #endif
2365
2366 /*
2367 * This function was originally taken from fs/mpage.c, and customized for f2fs.
2368 * Major change was from block_size == page_size in f2fs by default.
2369 */
f2fs_mpage_readpages(struct inode * inode,struct readahead_control * rac,struct folio * folio)2370 static int f2fs_mpage_readpages(struct inode *inode,
2371 struct readahead_control *rac, struct folio *folio)
2372 {
2373 struct bio *bio = NULL;
2374 sector_t last_block_in_bio = 0;
2375 struct f2fs_map_blocks map;
2376 #ifdef CONFIG_F2FS_FS_COMPRESSION
2377 struct compress_ctx cc = {
2378 .inode = inode,
2379 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2380 .cluster_size = F2FS_I(inode)->i_cluster_size,
2381 .cluster_idx = NULL_CLUSTER,
2382 .rpages = NULL,
2383 .cpages = NULL,
2384 .nr_rpages = 0,
2385 .nr_cpages = 0,
2386 };
2387 pgoff_t nc_cluster_idx = NULL_CLUSTER;
2388 #endif
2389 unsigned nr_pages = rac ? readahead_count(rac) : 1;
2390 unsigned max_nr_pages = nr_pages;
2391 pgoff_t index;
2392 int ret = 0;
2393
2394 map.m_pblk = 0;
2395 map.m_lblk = 0;
2396 map.m_len = 0;
2397 map.m_flags = 0;
2398 map.m_next_pgofs = NULL;
2399 map.m_next_extent = NULL;
2400 map.m_seg_type = NO_CHECK_TYPE;
2401 map.m_may_create = false;
2402
2403 for (; nr_pages; nr_pages--) {
2404 if (rac) {
2405 folio = readahead_folio(rac);
2406 prefetchw(&folio->flags);
2407 }
2408
2409 index = folio_index(folio);
2410
2411 #ifdef CONFIG_F2FS_FS_COMPRESSION
2412 if (!f2fs_compressed_file(inode))
2413 goto read_single_page;
2414
2415 /* there are remained compressed pages, submit them */
2416 if (!f2fs_cluster_can_merge_page(&cc, index)) {
2417 ret = f2fs_read_multi_pages(&cc, &bio,
2418 max_nr_pages,
2419 &last_block_in_bio,
2420 rac, false);
2421 f2fs_destroy_compress_ctx(&cc, false);
2422 if (ret)
2423 goto set_error_page;
2424 }
2425 if (cc.cluster_idx == NULL_CLUSTER) {
2426 if (nc_cluster_idx == index >> cc.log_cluster_size)
2427 goto read_single_page;
2428
2429 ret = f2fs_is_compressed_cluster(inode, index);
2430 if (ret < 0)
2431 goto set_error_page;
2432 else if (!ret) {
2433 nc_cluster_idx =
2434 index >> cc.log_cluster_size;
2435 goto read_single_page;
2436 }
2437
2438 nc_cluster_idx = NULL_CLUSTER;
2439 }
2440 ret = f2fs_init_compress_ctx(&cc);
2441 if (ret)
2442 goto set_error_page;
2443
2444 f2fs_compress_ctx_add_page(&cc, folio);
2445
2446 goto next_page;
2447 read_single_page:
2448 #endif
2449
2450 ret = f2fs_read_single_page(inode, folio, max_nr_pages, &map,
2451 &bio, &last_block_in_bio, rac);
2452 if (ret) {
2453 #ifdef CONFIG_F2FS_FS_COMPRESSION
2454 set_error_page:
2455 #endif
2456 folio_zero_segment(folio, 0, folio_size(folio));
2457 folio_unlock(folio);
2458 }
2459 #ifdef CONFIG_F2FS_FS_COMPRESSION
2460 next_page:
2461 #endif
2462
2463 #ifdef CONFIG_F2FS_FS_COMPRESSION
2464 if (f2fs_compressed_file(inode)) {
2465 /* last page */
2466 if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) {
2467 ret = f2fs_read_multi_pages(&cc, &bio,
2468 max_nr_pages,
2469 &last_block_in_bio,
2470 rac, false);
2471 f2fs_destroy_compress_ctx(&cc, false);
2472 }
2473 }
2474 #endif
2475 }
2476 if (bio)
2477 f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
2478 return ret;
2479 }
2480
f2fs_read_data_folio(struct file * file,struct folio * folio)2481 static int f2fs_read_data_folio(struct file *file, struct folio *folio)
2482 {
2483 struct inode *inode = folio_file_mapping(folio)->host;
2484 int ret = -EAGAIN;
2485
2486 trace_f2fs_readpage(folio, DATA);
2487
2488 if (!f2fs_is_compress_backend_ready(inode)) {
2489 folio_unlock(folio);
2490 return -EOPNOTSUPP;
2491 }
2492
2493 /* If the file has inline data, try to read it directly */
2494 if (f2fs_has_inline_data(inode))
2495 ret = f2fs_read_inline_data(inode, folio);
2496 if (ret == -EAGAIN)
2497 ret = f2fs_mpage_readpages(inode, NULL, folio);
2498 return ret;
2499 }
2500
f2fs_readahead(struct readahead_control * rac)2501 static void f2fs_readahead(struct readahead_control *rac)
2502 {
2503 struct inode *inode = rac->mapping->host;
2504
2505 trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac));
2506
2507 if (!f2fs_is_compress_backend_ready(inode))
2508 return;
2509
2510 /* If the file has inline data, skip readahead */
2511 if (f2fs_has_inline_data(inode))
2512 return;
2513
2514 f2fs_mpage_readpages(inode, rac, NULL);
2515 }
2516
f2fs_encrypt_one_page(struct f2fs_io_info * fio)2517 int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
2518 {
2519 struct inode *inode = fio->page->mapping->host;
2520 struct page *mpage, *page;
2521 gfp_t gfp_flags = GFP_NOFS;
2522
2523 if (!f2fs_encrypted_file(inode))
2524 return 0;
2525
2526 page = fio->compressed_page ? fio->compressed_page : fio->page;
2527
2528 if (fscrypt_inode_uses_inline_crypto(inode))
2529 return 0;
2530
2531 retry_encrypt:
2532 fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page,
2533 PAGE_SIZE, 0, gfp_flags);
2534 if (IS_ERR(fio->encrypted_page)) {
2535 /* flush pending IOs and wait for a while in the ENOMEM case */
2536 if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
2537 f2fs_flush_merged_writes(fio->sbi);
2538 memalloc_retry_wait(GFP_NOFS);
2539 gfp_flags |= __GFP_NOFAIL;
2540 goto retry_encrypt;
2541 }
2542 return PTR_ERR(fio->encrypted_page);
2543 }
2544
2545 mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
2546 if (mpage) {
2547 if (PageUptodate(mpage))
2548 memcpy(page_address(mpage),
2549 page_address(fio->encrypted_page), PAGE_SIZE);
2550 f2fs_put_page(mpage, 1);
2551 }
2552 return 0;
2553 }
2554
check_inplace_update_policy(struct inode * inode,struct f2fs_io_info * fio)2555 static inline bool check_inplace_update_policy(struct inode *inode,
2556 struct f2fs_io_info *fio)
2557 {
2558 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2559
2560 if (IS_F2FS_IPU_HONOR_OPU_WRITE(sbi) &&
2561 is_inode_flag_set(inode, FI_OPU_WRITE))
2562 return false;
2563 if (IS_F2FS_IPU_FORCE(sbi))
2564 return true;
2565 if (IS_F2FS_IPU_SSR(sbi) && f2fs_need_SSR(sbi))
2566 return true;
2567 if (IS_F2FS_IPU_UTIL(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util)
2568 return true;
2569 if (IS_F2FS_IPU_SSR_UTIL(sbi) && f2fs_need_SSR(sbi) &&
2570 utilization(sbi) > SM_I(sbi)->min_ipu_util)
2571 return true;
2572
2573 /*
2574 * IPU for rewrite async pages
2575 */
2576 if (IS_F2FS_IPU_ASYNC(sbi) && fio && fio->op == REQ_OP_WRITE &&
2577 !(fio->op_flags & REQ_SYNC) && !IS_ENCRYPTED(inode))
2578 return true;
2579
2580 /* this is only set during fdatasync */
2581 if (IS_F2FS_IPU_FSYNC(sbi) && is_inode_flag_set(inode, FI_NEED_IPU))
2582 return true;
2583
2584 if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2585 !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2586 return true;
2587
2588 return false;
2589 }
2590
f2fs_should_update_inplace(struct inode * inode,struct f2fs_io_info * fio)2591 bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
2592 {
2593 /* swap file is migrating in aligned write mode */
2594 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2595 return false;
2596
2597 if (f2fs_is_pinned_file(inode))
2598 return true;
2599
2600 /* if this is cold file, we should overwrite to avoid fragmentation */
2601 if (file_is_cold(inode) && !is_inode_flag_set(inode, FI_OPU_WRITE))
2602 return true;
2603
2604 return check_inplace_update_policy(inode, fio);
2605 }
2606
f2fs_should_update_outplace(struct inode * inode,struct f2fs_io_info * fio)2607 bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
2608 {
2609 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2610
2611 /* The below cases were checked when setting it. */
2612 if (f2fs_is_pinned_file(inode))
2613 return false;
2614 if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
2615 return true;
2616 if (f2fs_lfs_mode(sbi))
2617 return true;
2618 if (S_ISDIR(inode->i_mode))
2619 return true;
2620 if (IS_NOQUOTA(inode))
2621 return true;
2622 if (f2fs_used_in_atomic_write(inode))
2623 return true;
2624 /* rewrite low ratio compress data w/ OPU mode to avoid fragmentation */
2625 if (f2fs_compressed_file(inode) &&
2626 F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER &&
2627 is_inode_flag_set(inode, FI_ENABLE_COMPRESS))
2628 return true;
2629
2630 /* swap file is migrating in aligned write mode */
2631 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2632 return true;
2633
2634 if (is_inode_flag_set(inode, FI_OPU_WRITE))
2635 return true;
2636
2637 if (fio) {
2638 if (page_private_gcing(fio->page))
2639 return true;
2640 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2641 f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2642 return true;
2643 }
2644 return false;
2645 }
2646
need_inplace_update(struct f2fs_io_info * fio)2647 static inline bool need_inplace_update(struct f2fs_io_info *fio)
2648 {
2649 struct inode *inode = fio->page->mapping->host;
2650
2651 if (f2fs_should_update_outplace(inode, fio))
2652 return false;
2653
2654 return f2fs_should_update_inplace(inode, fio);
2655 }
2656
f2fs_do_write_data_page(struct f2fs_io_info * fio)2657 int f2fs_do_write_data_page(struct f2fs_io_info *fio)
2658 {
2659 struct folio *folio = page_folio(fio->page);
2660 struct inode *inode = folio->mapping->host;
2661 struct dnode_of_data dn;
2662 struct node_info ni;
2663 bool ipu_force = false;
2664 bool atomic_commit;
2665 int err = 0;
2666
2667 /* Use COW inode to make dnode_of_data for atomic write */
2668 atomic_commit = f2fs_is_atomic_file(inode) &&
2669 page_private_atomic(folio_page(folio, 0));
2670 if (atomic_commit)
2671 set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0);
2672 else
2673 set_new_dnode(&dn, inode, NULL, NULL, 0);
2674
2675 if (need_inplace_update(fio) &&
2676 f2fs_lookup_read_extent_cache_block(inode, folio->index,
2677 &fio->old_blkaddr)) {
2678 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2679 DATA_GENERIC_ENHANCE))
2680 return -EFSCORRUPTED;
2681
2682 ipu_force = true;
2683 fio->need_lock = LOCK_DONE;
2684 goto got_it;
2685 }
2686
2687 /* Deadlock due to between page->lock and f2fs_lock_op */
2688 if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
2689 return -EAGAIN;
2690
2691 err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE);
2692 if (err)
2693 goto out;
2694
2695 fio->old_blkaddr = dn.data_blkaddr;
2696
2697 /* This page is already truncated */
2698 if (fio->old_blkaddr == NULL_ADDR) {
2699 folio_clear_uptodate(folio);
2700 clear_page_private_gcing(folio_page(folio, 0));
2701 goto out_writepage;
2702 }
2703 got_it:
2704 if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2705 !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2706 DATA_GENERIC_ENHANCE)) {
2707 err = -EFSCORRUPTED;
2708 goto out_writepage;
2709 }
2710
2711 /* wait for GCed page writeback via META_MAPPING */
2712 if (fio->meta_gc)
2713 f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
2714
2715 /*
2716 * If current allocation needs SSR,
2717 * it had better in-place writes for updated data.
2718 */
2719 if (ipu_force ||
2720 (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2721 need_inplace_update(fio))) {
2722 err = f2fs_encrypt_one_page(fio);
2723 if (err)
2724 goto out_writepage;
2725
2726 folio_start_writeback(folio);
2727 f2fs_put_dnode(&dn);
2728 if (fio->need_lock == LOCK_REQ)
2729 f2fs_unlock_op(fio->sbi);
2730 err = f2fs_inplace_write_data(fio);
2731 if (err) {
2732 if (fscrypt_inode_uses_fs_layer_crypto(inode))
2733 fscrypt_finalize_bounce_page(&fio->encrypted_page);
2734 folio_end_writeback(folio);
2735 } else {
2736 set_inode_flag(inode, FI_UPDATE_WRITE);
2737 }
2738 trace_f2fs_do_write_data_page(folio, IPU);
2739 return err;
2740 }
2741
2742 if (fio->need_lock == LOCK_RETRY) {
2743 if (!f2fs_trylock_op(fio->sbi)) {
2744 err = -EAGAIN;
2745 goto out_writepage;
2746 }
2747 fio->need_lock = LOCK_REQ;
2748 }
2749
2750 err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
2751 if (err)
2752 goto out_writepage;
2753
2754 fio->version = ni.version;
2755
2756 err = f2fs_encrypt_one_page(fio);
2757 if (err)
2758 goto out_writepage;
2759
2760 folio_start_writeback(folio);
2761
2762 if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
2763 f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
2764
2765 /* LFS mode write path */
2766 f2fs_outplace_write_data(&dn, fio);
2767 trace_f2fs_do_write_data_page(folio, OPU);
2768 set_inode_flag(inode, FI_APPEND_WRITE);
2769 if (atomic_commit)
2770 clear_page_private_atomic(folio_page(folio, 0));
2771 out_writepage:
2772 f2fs_put_dnode(&dn);
2773 out:
2774 if (fio->need_lock == LOCK_REQ)
2775 f2fs_unlock_op(fio->sbi);
2776 return err;
2777 }
2778
f2fs_write_single_data_page(struct folio * folio,int * submitted,struct bio ** bio,sector_t * last_block,struct writeback_control * wbc,enum iostat_type io_type,int compr_blocks,bool allow_balance)2779 int f2fs_write_single_data_page(struct folio *folio, int *submitted,
2780 struct bio **bio,
2781 sector_t *last_block,
2782 struct writeback_control *wbc,
2783 enum iostat_type io_type,
2784 int compr_blocks,
2785 bool allow_balance)
2786 {
2787 struct inode *inode = folio->mapping->host;
2788 struct page *page = folio_page(folio, 0);
2789 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2790 loff_t i_size = i_size_read(inode);
2791 const pgoff_t end_index = ((unsigned long long)i_size)
2792 >> PAGE_SHIFT;
2793 loff_t psize = (loff_t)(folio->index + 1) << PAGE_SHIFT;
2794 unsigned offset = 0;
2795 bool need_balance_fs = false;
2796 bool quota_inode = IS_NOQUOTA(inode);
2797 int err = 0;
2798 struct f2fs_io_info fio = {
2799 .sbi = sbi,
2800 .ino = inode->i_ino,
2801 .type = DATA,
2802 .op = REQ_OP_WRITE,
2803 .op_flags = wbc_to_write_flags(wbc),
2804 .old_blkaddr = NULL_ADDR,
2805 .page = page,
2806 .encrypted_page = NULL,
2807 .submitted = 0,
2808 .compr_blocks = compr_blocks,
2809 .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY,
2810 .meta_gc = f2fs_meta_inode_gc_required(inode) ? 1 : 0,
2811 .io_type = io_type,
2812 .io_wbc = wbc,
2813 .bio = bio,
2814 .last_block = last_block,
2815 };
2816
2817 trace_f2fs_writepage(folio, DATA);
2818
2819 /* we should bypass data pages to proceed the kworker jobs */
2820 if (unlikely(f2fs_cp_error(sbi))) {
2821 mapping_set_error(folio->mapping, -EIO);
2822 /*
2823 * don't drop any dirty dentry pages for keeping lastest
2824 * directory structure.
2825 */
2826 if (S_ISDIR(inode->i_mode) &&
2827 !is_sbi_flag_set(sbi, SBI_IS_CLOSE))
2828 goto redirty_out;
2829
2830 /* keep data pages in remount-ro mode */
2831 if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
2832 goto redirty_out;
2833 goto out;
2834 }
2835
2836 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
2837 goto redirty_out;
2838
2839 if (folio->index < end_index ||
2840 f2fs_verity_in_progress(inode) ||
2841 compr_blocks)
2842 goto write;
2843
2844 /*
2845 * If the offset is out-of-range of file size,
2846 * this page does not have to be written to disk.
2847 */
2848 offset = i_size & (PAGE_SIZE - 1);
2849 if ((folio->index >= end_index + 1) || !offset)
2850 goto out;
2851
2852 folio_zero_segment(folio, offset, folio_size(folio));
2853 write:
2854 /* Dentry/quota blocks are controlled by checkpoint */
2855 if (S_ISDIR(inode->i_mode) || quota_inode) {
2856 /*
2857 * We need to wait for node_write to avoid block allocation during
2858 * checkpoint. This can only happen to quota writes which can cause
2859 * the below discard race condition.
2860 */
2861 if (quota_inode)
2862 f2fs_down_read(&sbi->node_write);
2863
2864 fio.need_lock = LOCK_DONE;
2865 err = f2fs_do_write_data_page(&fio);
2866
2867 if (quota_inode)
2868 f2fs_up_read(&sbi->node_write);
2869
2870 goto done;
2871 }
2872
2873 if (!wbc->for_reclaim)
2874 need_balance_fs = true;
2875 else if (has_not_enough_free_secs(sbi, 0, 0))
2876 goto redirty_out;
2877 else
2878 set_inode_flag(inode, FI_HOT_DATA);
2879
2880 err = -EAGAIN;
2881 if (f2fs_has_inline_data(inode)) {
2882 err = f2fs_write_inline_data(inode, folio);
2883 if (!err)
2884 goto out;
2885 }
2886
2887 if (err == -EAGAIN) {
2888 err = f2fs_do_write_data_page(&fio);
2889 if (err == -EAGAIN) {
2890 f2fs_bug_on(sbi, compr_blocks);
2891 fio.need_lock = LOCK_REQ;
2892 err = f2fs_do_write_data_page(&fio);
2893 }
2894 }
2895
2896 if (err) {
2897 file_set_keep_isize(inode);
2898 } else {
2899 spin_lock(&F2FS_I(inode)->i_size_lock);
2900 if (F2FS_I(inode)->last_disk_size < psize)
2901 F2FS_I(inode)->last_disk_size = psize;
2902 spin_unlock(&F2FS_I(inode)->i_size_lock);
2903 }
2904
2905 done:
2906 if (err && err != -ENOENT)
2907 goto redirty_out;
2908
2909 out:
2910 inode_dec_dirty_pages(inode);
2911 if (err) {
2912 folio_clear_uptodate(folio);
2913 clear_page_private_gcing(page);
2914 }
2915
2916 if (wbc->for_reclaim) {
2917 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
2918 clear_inode_flag(inode, FI_HOT_DATA);
2919 f2fs_remove_dirty_inode(inode);
2920 submitted = NULL;
2921 }
2922 folio_unlock(folio);
2923 if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
2924 !F2FS_I(inode)->wb_task && allow_balance)
2925 f2fs_balance_fs(sbi, need_balance_fs);
2926
2927 if (unlikely(f2fs_cp_error(sbi))) {
2928 f2fs_submit_merged_write(sbi, DATA);
2929 if (bio && *bio)
2930 f2fs_submit_merged_ipu_write(sbi, bio, NULL);
2931 submitted = NULL;
2932 }
2933
2934 if (submitted)
2935 *submitted = fio.submitted;
2936
2937 return 0;
2938
2939 redirty_out:
2940 folio_redirty_for_writepage(wbc, folio);
2941 /*
2942 * pageout() in MM translates EAGAIN, so calls handle_write_error()
2943 * -> mapping_set_error() -> set_bit(AS_EIO, ...).
2944 * file_write_and_wait_range() will see EIO error, which is critical
2945 * to return value of fsync() followed by atomic_write failure to user.
2946 */
2947 if (!err || wbc->for_reclaim)
2948 return AOP_WRITEPAGE_ACTIVATE;
2949 folio_unlock(folio);
2950 return err;
2951 }
2952
f2fs_write_data_page(struct page * page,struct writeback_control * wbc)2953 static int f2fs_write_data_page(struct page *page,
2954 struct writeback_control *wbc)
2955 {
2956 struct folio *folio = page_folio(page);
2957 #ifdef CONFIG_F2FS_FS_COMPRESSION
2958 struct inode *inode = folio->mapping->host;
2959
2960 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
2961 goto out;
2962
2963 if (f2fs_compressed_file(inode)) {
2964 if (f2fs_is_compressed_cluster(inode, folio->index)) {
2965 folio_redirty_for_writepage(wbc, folio);
2966 return AOP_WRITEPAGE_ACTIVATE;
2967 }
2968 }
2969 out:
2970 #endif
2971
2972 return f2fs_write_single_data_page(folio, NULL, NULL, NULL,
2973 wbc, FS_DATA_IO, 0, true);
2974 }
2975
2976 /*
2977 * This function was copied from write_cache_pages from mm/page-writeback.c.
2978 * The major change is making write step of cold data page separately from
2979 * warm/hot data page.
2980 */
f2fs_write_cache_pages(struct address_space * mapping,struct writeback_control * wbc,enum iostat_type io_type)2981 static int f2fs_write_cache_pages(struct address_space *mapping,
2982 struct writeback_control *wbc,
2983 enum iostat_type io_type)
2984 {
2985 int ret = 0;
2986 int done = 0, retry = 0;
2987 struct page *pages_local[F2FS_ONSTACK_PAGES];
2988 struct page **pages = pages_local;
2989 struct folio_batch fbatch;
2990 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
2991 struct bio *bio = NULL;
2992 sector_t last_block;
2993 #ifdef CONFIG_F2FS_FS_COMPRESSION
2994 struct inode *inode = mapping->host;
2995 struct compress_ctx cc = {
2996 .inode = inode,
2997 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2998 .cluster_size = F2FS_I(inode)->i_cluster_size,
2999 .cluster_idx = NULL_CLUSTER,
3000 .rpages = NULL,
3001 .nr_rpages = 0,
3002 .cpages = NULL,
3003 .valid_nr_cpages = 0,
3004 .rbuf = NULL,
3005 .cbuf = NULL,
3006 .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
3007 .private = NULL,
3008 };
3009 #endif
3010 int nr_folios, p, idx;
3011 int nr_pages;
3012 unsigned int max_pages = F2FS_ONSTACK_PAGES;
3013 pgoff_t index;
3014 pgoff_t end; /* Inclusive */
3015 pgoff_t done_index;
3016 int range_whole = 0;
3017 xa_mark_t tag;
3018 int nwritten = 0;
3019 int submitted = 0;
3020 int i;
3021
3022 #ifdef CONFIG_F2FS_FS_COMPRESSION
3023 if (f2fs_compressed_file(inode) &&
3024 1 << cc.log_cluster_size > F2FS_ONSTACK_PAGES) {
3025 pages = f2fs_kzalloc(sbi, sizeof(struct page *) <<
3026 cc.log_cluster_size, GFP_NOFS | __GFP_NOFAIL);
3027 max_pages = 1 << cc.log_cluster_size;
3028 }
3029 #endif
3030
3031 folio_batch_init(&fbatch);
3032
3033 if (get_dirty_pages(mapping->host) <=
3034 SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
3035 set_inode_flag(mapping->host, FI_HOT_DATA);
3036 else
3037 clear_inode_flag(mapping->host, FI_HOT_DATA);
3038
3039 if (wbc->range_cyclic) {
3040 index = mapping->writeback_index; /* prev offset */
3041 end = -1;
3042 } else {
3043 index = wbc->range_start >> PAGE_SHIFT;
3044 end = wbc->range_end >> PAGE_SHIFT;
3045 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
3046 range_whole = 1;
3047 }
3048 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
3049 tag = PAGECACHE_TAG_TOWRITE;
3050 else
3051 tag = PAGECACHE_TAG_DIRTY;
3052 retry:
3053 retry = 0;
3054 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
3055 tag_pages_for_writeback(mapping, index, end);
3056 done_index = index;
3057 while (!done && !retry && (index <= end)) {
3058 nr_pages = 0;
3059 again:
3060 nr_folios = filemap_get_folios_tag(mapping, &index, end,
3061 tag, &fbatch);
3062 if (nr_folios == 0) {
3063 if (nr_pages)
3064 goto write;
3065 break;
3066 }
3067
3068 for (i = 0; i < nr_folios; i++) {
3069 struct folio *folio = fbatch.folios[i];
3070
3071 idx = 0;
3072 p = folio_nr_pages(folio);
3073 add_more:
3074 pages[nr_pages] = folio_page(folio, idx);
3075 folio_get(folio);
3076 if (++nr_pages == max_pages) {
3077 index = folio->index + idx + 1;
3078 folio_batch_release(&fbatch);
3079 goto write;
3080 }
3081 if (++idx < p)
3082 goto add_more;
3083 }
3084 folio_batch_release(&fbatch);
3085 goto again;
3086 write:
3087 for (i = 0; i < nr_pages; i++) {
3088 struct page *page = pages[i];
3089 struct folio *folio = page_folio(page);
3090 bool need_readd;
3091 readd:
3092 need_readd = false;
3093 #ifdef CONFIG_F2FS_FS_COMPRESSION
3094 if (f2fs_compressed_file(inode)) {
3095 void *fsdata = NULL;
3096 struct page *pagep;
3097 int ret2;
3098
3099 ret = f2fs_init_compress_ctx(&cc);
3100 if (ret) {
3101 done = 1;
3102 break;
3103 }
3104
3105 if (!f2fs_cluster_can_merge_page(&cc,
3106 folio->index)) {
3107 ret = f2fs_write_multi_pages(&cc,
3108 &submitted, wbc, io_type);
3109 if (!ret)
3110 need_readd = true;
3111 goto result;
3112 }
3113
3114 if (unlikely(f2fs_cp_error(sbi)))
3115 goto lock_folio;
3116
3117 if (!f2fs_cluster_is_empty(&cc))
3118 goto lock_folio;
3119
3120 if (f2fs_all_cluster_page_ready(&cc,
3121 pages, i, nr_pages, true))
3122 goto lock_folio;
3123
3124 ret2 = f2fs_prepare_compress_overwrite(
3125 inode, &pagep,
3126 folio->index, &fsdata);
3127 if (ret2 < 0) {
3128 ret = ret2;
3129 done = 1;
3130 break;
3131 } else if (ret2 &&
3132 (!f2fs_compress_write_end(inode,
3133 fsdata, folio->index, 1) ||
3134 !f2fs_all_cluster_page_ready(&cc,
3135 pages, i, nr_pages,
3136 false))) {
3137 retry = 1;
3138 break;
3139 }
3140 }
3141 #endif
3142 /* give a priority to WB_SYNC threads */
3143 if (atomic_read(&sbi->wb_sync_req[DATA]) &&
3144 wbc->sync_mode == WB_SYNC_NONE) {
3145 done = 1;
3146 break;
3147 }
3148 #ifdef CONFIG_F2FS_FS_COMPRESSION
3149 lock_folio:
3150 #endif
3151 done_index = folio->index;
3152 retry_write:
3153 folio_lock(folio);
3154
3155 if (unlikely(folio->mapping != mapping)) {
3156 continue_unlock:
3157 folio_unlock(folio);
3158 continue;
3159 }
3160
3161 if (!folio_test_dirty(folio)) {
3162 /* someone wrote it for us */
3163 goto continue_unlock;
3164 }
3165
3166 if (folio_test_writeback(folio)) {
3167 if (wbc->sync_mode == WB_SYNC_NONE)
3168 goto continue_unlock;
3169 f2fs_wait_on_page_writeback(&folio->page, DATA, true, true);
3170 }
3171
3172 if (!folio_clear_dirty_for_io(folio))
3173 goto continue_unlock;
3174
3175 #ifdef CONFIG_F2FS_FS_COMPRESSION
3176 if (f2fs_compressed_file(inode)) {
3177 folio_get(folio);
3178 f2fs_compress_ctx_add_page(&cc, folio);
3179 continue;
3180 }
3181 #endif
3182 ret = f2fs_write_single_data_page(folio,
3183 &submitted, &bio, &last_block,
3184 wbc, io_type, 0, true);
3185 if (ret == AOP_WRITEPAGE_ACTIVATE)
3186 folio_unlock(folio);
3187 #ifdef CONFIG_F2FS_FS_COMPRESSION
3188 result:
3189 #endif
3190 nwritten += submitted;
3191 wbc->nr_to_write -= submitted;
3192
3193 if (unlikely(ret)) {
3194 /*
3195 * keep nr_to_write, since vfs uses this to
3196 * get # of written pages.
3197 */
3198 if (ret == AOP_WRITEPAGE_ACTIVATE) {
3199 ret = 0;
3200 goto next;
3201 } else if (ret == -EAGAIN) {
3202 ret = 0;
3203 if (wbc->sync_mode == WB_SYNC_ALL) {
3204 f2fs_io_schedule_timeout(
3205 DEFAULT_IO_TIMEOUT);
3206 goto retry_write;
3207 }
3208 goto next;
3209 }
3210 done_index = folio_next_index(folio);
3211 done = 1;
3212 break;
3213 }
3214
3215 if (wbc->nr_to_write <= 0 &&
3216 wbc->sync_mode == WB_SYNC_NONE) {
3217 done = 1;
3218 break;
3219 }
3220 next:
3221 if (need_readd)
3222 goto readd;
3223 }
3224 release_pages(pages, nr_pages);
3225 cond_resched();
3226 }
3227 #ifdef CONFIG_F2FS_FS_COMPRESSION
3228 /* flush remained pages in compress cluster */
3229 if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) {
3230 ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type);
3231 nwritten += submitted;
3232 wbc->nr_to_write -= submitted;
3233 if (ret) {
3234 done = 1;
3235 retry = 0;
3236 }
3237 }
3238 if (f2fs_compressed_file(inode))
3239 f2fs_destroy_compress_ctx(&cc, false);
3240 #endif
3241 if (retry) {
3242 index = 0;
3243 end = -1;
3244 goto retry;
3245 }
3246 if (wbc->range_cyclic && !done)
3247 done_index = 0;
3248 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
3249 mapping->writeback_index = done_index;
3250
3251 if (nwritten)
3252 f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
3253 NULL, 0, DATA);
3254 /* submit cached bio of IPU write */
3255 if (bio)
3256 f2fs_submit_merged_ipu_write(sbi, &bio, NULL);
3257
3258 #ifdef CONFIG_F2FS_FS_COMPRESSION
3259 if (pages != pages_local)
3260 kfree(pages);
3261 #endif
3262
3263 return ret;
3264 }
3265
__should_serialize_io(struct inode * inode,struct writeback_control * wbc)3266 static inline bool __should_serialize_io(struct inode *inode,
3267 struct writeback_control *wbc)
3268 {
3269 /* to avoid deadlock in path of data flush */
3270 if (F2FS_I(inode)->wb_task)
3271 return false;
3272
3273 if (!S_ISREG(inode->i_mode))
3274 return false;
3275 if (IS_NOQUOTA(inode))
3276 return false;
3277
3278 if (f2fs_need_compress_data(inode))
3279 return true;
3280 if (wbc->sync_mode != WB_SYNC_ALL)
3281 return true;
3282 if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
3283 return true;
3284 return false;
3285 }
3286
__f2fs_write_data_pages(struct address_space * mapping,struct writeback_control * wbc,enum iostat_type io_type)3287 static int __f2fs_write_data_pages(struct address_space *mapping,
3288 struct writeback_control *wbc,
3289 enum iostat_type io_type)
3290 {
3291 struct inode *inode = mapping->host;
3292 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3293 struct blk_plug plug;
3294 int ret;
3295 bool locked = false;
3296
3297 /* deal with chardevs and other special file */
3298 if (!mapping->a_ops->writepage)
3299 return 0;
3300
3301 /* skip writing if there is no dirty page in this inode */
3302 if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
3303 return 0;
3304
3305 /* during POR, we don't need to trigger writepage at all. */
3306 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
3307 goto skip_write;
3308
3309 if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
3310 wbc->sync_mode == WB_SYNC_NONE &&
3311 get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
3312 f2fs_available_free_memory(sbi, DIRTY_DENTS))
3313 goto skip_write;
3314
3315 /* skip writing in file defragment preparing stage */
3316 if (is_inode_flag_set(inode, FI_SKIP_WRITES))
3317 goto skip_write;
3318
3319 trace_f2fs_writepages(mapping->host, wbc, DATA);
3320
3321 /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
3322 if (wbc->sync_mode == WB_SYNC_ALL)
3323 atomic_inc(&sbi->wb_sync_req[DATA]);
3324 else if (atomic_read(&sbi->wb_sync_req[DATA])) {
3325 /* to avoid potential deadlock */
3326 if (current->plug)
3327 blk_finish_plug(current->plug);
3328 goto skip_write;
3329 }
3330
3331 if (__should_serialize_io(inode, wbc)) {
3332 mutex_lock(&sbi->writepages);
3333 locked = true;
3334 }
3335
3336 blk_start_plug(&plug);
3337 ret = f2fs_write_cache_pages(mapping, wbc, io_type);
3338 blk_finish_plug(&plug);
3339
3340 if (locked)
3341 mutex_unlock(&sbi->writepages);
3342
3343 if (wbc->sync_mode == WB_SYNC_ALL)
3344 atomic_dec(&sbi->wb_sync_req[DATA]);
3345 /*
3346 * if some pages were truncated, we cannot guarantee its mapping->host
3347 * to detect pending bios.
3348 */
3349
3350 f2fs_remove_dirty_inode(inode);
3351 return ret;
3352
3353 skip_write:
3354 wbc->pages_skipped += get_dirty_pages(inode);
3355 trace_f2fs_writepages(mapping->host, wbc, DATA);
3356 return 0;
3357 }
3358
f2fs_write_data_pages(struct address_space * mapping,struct writeback_control * wbc)3359 static int f2fs_write_data_pages(struct address_space *mapping,
3360 struct writeback_control *wbc)
3361 {
3362 struct inode *inode = mapping->host;
3363
3364 return __f2fs_write_data_pages(mapping, wbc,
3365 F2FS_I(inode)->cp_task == current ?
3366 FS_CP_DATA_IO : FS_DATA_IO);
3367 }
3368
f2fs_write_failed(struct inode * inode,loff_t to)3369 void f2fs_write_failed(struct inode *inode, loff_t to)
3370 {
3371 loff_t i_size = i_size_read(inode);
3372
3373 if (IS_NOQUOTA(inode))
3374 return;
3375
3376 /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
3377 if (to > i_size && !f2fs_verity_in_progress(inode)) {
3378 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3379 filemap_invalidate_lock(inode->i_mapping);
3380
3381 truncate_pagecache(inode, i_size);
3382 f2fs_truncate_blocks(inode, i_size, true);
3383
3384 filemap_invalidate_unlock(inode->i_mapping);
3385 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3386 }
3387 }
3388
prepare_write_begin(struct f2fs_sb_info * sbi,struct folio * folio,loff_t pos,unsigned int len,block_t * blk_addr,bool * node_changed)3389 static int prepare_write_begin(struct f2fs_sb_info *sbi,
3390 struct folio *folio, loff_t pos, unsigned int len,
3391 block_t *blk_addr, bool *node_changed)
3392 {
3393 struct inode *inode = folio->mapping->host;
3394 pgoff_t index = folio->index;
3395 struct dnode_of_data dn;
3396 struct page *ipage;
3397 bool locked = false;
3398 int flag = F2FS_GET_BLOCK_PRE_AIO;
3399 int err = 0;
3400
3401 /*
3402 * If a whole page is being written and we already preallocated all the
3403 * blocks, then there is no need to get a block address now.
3404 */
3405 if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
3406 return 0;
3407
3408 /* f2fs_lock_op avoids race between write CP and convert_inline_page */
3409 if (f2fs_has_inline_data(inode)) {
3410 if (pos + len > MAX_INLINE_DATA(inode))
3411 flag = F2FS_GET_BLOCK_DEFAULT;
3412 f2fs_map_lock(sbi, flag);
3413 locked = true;
3414 } else if ((pos & PAGE_MASK) >= i_size_read(inode)) {
3415 f2fs_map_lock(sbi, flag);
3416 locked = true;
3417 }
3418
3419 restart:
3420 /* check inline_data */
3421 ipage = f2fs_get_node_page(sbi, inode->i_ino);
3422 if (IS_ERR(ipage)) {
3423 err = PTR_ERR(ipage);
3424 goto unlock_out;
3425 }
3426
3427 set_new_dnode(&dn, inode, ipage, ipage, 0);
3428
3429 if (f2fs_has_inline_data(inode)) {
3430 if (pos + len <= MAX_INLINE_DATA(inode)) {
3431 f2fs_do_read_inline_data(folio, ipage);
3432 set_inode_flag(inode, FI_DATA_EXIST);
3433 if (inode->i_nlink)
3434 set_page_private_inline(ipage);
3435 goto out;
3436 }
3437 err = f2fs_convert_inline_page(&dn, folio_page(folio, 0));
3438 if (err || dn.data_blkaddr != NULL_ADDR)
3439 goto out;
3440 }
3441
3442 if (!f2fs_lookup_read_extent_cache_block(inode, index,
3443 &dn.data_blkaddr)) {
3444 if (locked) {
3445 err = f2fs_reserve_block(&dn, index);
3446 goto out;
3447 }
3448
3449 /* hole case */
3450 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3451 if (!err && dn.data_blkaddr != NULL_ADDR)
3452 goto out;
3453 f2fs_put_dnode(&dn);
3454 f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
3455 WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
3456 locked = true;
3457 goto restart;
3458 }
3459 out:
3460 if (!err) {
3461 /* convert_inline_page can make node_changed */
3462 *blk_addr = dn.data_blkaddr;
3463 *node_changed = dn.node_changed;
3464 }
3465 f2fs_put_dnode(&dn);
3466 unlock_out:
3467 if (locked)
3468 f2fs_map_unlock(sbi, flag);
3469 return err;
3470 }
3471
__find_data_block(struct inode * inode,pgoff_t index,block_t * blk_addr)3472 static int __find_data_block(struct inode *inode, pgoff_t index,
3473 block_t *blk_addr)
3474 {
3475 struct dnode_of_data dn;
3476 struct page *ipage;
3477 int err = 0;
3478
3479 ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
3480 if (IS_ERR(ipage))
3481 return PTR_ERR(ipage);
3482
3483 set_new_dnode(&dn, inode, ipage, ipage, 0);
3484
3485 if (!f2fs_lookup_read_extent_cache_block(inode, index,
3486 &dn.data_blkaddr)) {
3487 /* hole case */
3488 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3489 if (err) {
3490 dn.data_blkaddr = NULL_ADDR;
3491 err = 0;
3492 }
3493 }
3494 *blk_addr = dn.data_blkaddr;
3495 f2fs_put_dnode(&dn);
3496 return err;
3497 }
3498
__reserve_data_block(struct inode * inode,pgoff_t index,block_t * blk_addr,bool * node_changed)3499 static int __reserve_data_block(struct inode *inode, pgoff_t index,
3500 block_t *blk_addr, bool *node_changed)
3501 {
3502 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3503 struct dnode_of_data dn;
3504 struct page *ipage;
3505 int err = 0;
3506
3507 f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
3508
3509 ipage = f2fs_get_node_page(sbi, inode->i_ino);
3510 if (IS_ERR(ipage)) {
3511 err = PTR_ERR(ipage);
3512 goto unlock_out;
3513 }
3514 set_new_dnode(&dn, inode, ipage, ipage, 0);
3515
3516 if (!f2fs_lookup_read_extent_cache_block(dn.inode, index,
3517 &dn.data_blkaddr))
3518 err = f2fs_reserve_block(&dn, index);
3519
3520 *blk_addr = dn.data_blkaddr;
3521 *node_changed = dn.node_changed;
3522 f2fs_put_dnode(&dn);
3523
3524 unlock_out:
3525 f2fs_map_unlock(sbi, F2FS_GET_BLOCK_PRE_AIO);
3526 return err;
3527 }
3528
prepare_atomic_write_begin(struct f2fs_sb_info * sbi,struct folio * folio,loff_t pos,unsigned int len,block_t * blk_addr,bool * node_changed,bool * use_cow)3529 static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
3530 struct folio *folio, loff_t pos, unsigned int len,
3531 block_t *blk_addr, bool *node_changed, bool *use_cow)
3532 {
3533 struct inode *inode = folio->mapping->host;
3534 struct inode *cow_inode = F2FS_I(inode)->cow_inode;
3535 pgoff_t index = folio->index;
3536 int err = 0;
3537 block_t ori_blk_addr = NULL_ADDR;
3538
3539 /* If pos is beyond the end of file, reserve a new block in COW inode */
3540 if ((pos & PAGE_MASK) >= i_size_read(inode))
3541 goto reserve_block;
3542
3543 /* Look for the block in COW inode first */
3544 err = __find_data_block(cow_inode, index, blk_addr);
3545 if (err) {
3546 return err;
3547 } else if (*blk_addr != NULL_ADDR) {
3548 *use_cow = true;
3549 return 0;
3550 }
3551
3552 if (is_inode_flag_set(inode, FI_ATOMIC_REPLACE))
3553 goto reserve_block;
3554
3555 /* Look for the block in the original inode */
3556 err = __find_data_block(inode, index, &ori_blk_addr);
3557 if (err)
3558 return err;
3559
3560 reserve_block:
3561 /* Finally, we should reserve a new block in COW inode for the update */
3562 err = __reserve_data_block(cow_inode, index, blk_addr, node_changed);
3563 if (err)
3564 return err;
3565 inc_atomic_write_cnt(inode);
3566
3567 if (ori_blk_addr != NULL_ADDR)
3568 *blk_addr = ori_blk_addr;
3569 return 0;
3570 }
3571
f2fs_write_begin(struct file * file,struct address_space * mapping,loff_t pos,unsigned len,struct folio ** foliop,void ** fsdata)3572 static int f2fs_write_begin(struct file *file, struct address_space *mapping,
3573 loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
3574 {
3575 struct inode *inode = mapping->host;
3576 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3577 struct folio *folio;
3578 pgoff_t index = pos >> PAGE_SHIFT;
3579 bool need_balance = false;
3580 bool use_cow = false;
3581 block_t blkaddr = NULL_ADDR;
3582 int err = 0;
3583
3584 trace_f2fs_write_begin(inode, pos, len);
3585
3586 if (!f2fs_is_checkpoint_ready(sbi)) {
3587 err = -ENOSPC;
3588 goto fail;
3589 }
3590
3591 /*
3592 * We should check this at this moment to avoid deadlock on inode page
3593 * and #0 page. The locking rule for inline_data conversion should be:
3594 * folio_lock(folio #0) -> folio_lock(inode_page)
3595 */
3596 if (index != 0) {
3597 err = f2fs_convert_inline_inode(inode);
3598 if (err)
3599 goto fail;
3600 }
3601
3602 #ifdef CONFIG_F2FS_FS_COMPRESSION
3603 if (f2fs_compressed_file(inode)) {
3604 int ret;
3605 struct page *page;
3606
3607 *fsdata = NULL;
3608
3609 if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
3610 goto repeat;
3611
3612 ret = f2fs_prepare_compress_overwrite(inode, &page,
3613 index, fsdata);
3614 if (ret < 0) {
3615 err = ret;
3616 goto fail;
3617 } else if (ret) {
3618 *foliop = page_folio(page);
3619 return 0;
3620 }
3621 }
3622 #endif
3623
3624 repeat:
3625 /*
3626 * Do not use FGP_STABLE to avoid deadlock.
3627 * Will wait that below with our IO control.
3628 */
3629 folio = __filemap_get_folio(mapping, index,
3630 FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
3631 if (IS_ERR(folio)) {
3632 err = PTR_ERR(folio);
3633 goto fail;
3634 }
3635
3636 /* TODO: cluster can be compressed due to race with .writepage */
3637
3638 *foliop = folio;
3639
3640 if (f2fs_is_atomic_file(inode))
3641 err = prepare_atomic_write_begin(sbi, folio, pos, len,
3642 &blkaddr, &need_balance, &use_cow);
3643 else
3644 err = prepare_write_begin(sbi, folio, pos, len,
3645 &blkaddr, &need_balance);
3646 if (err)
3647 goto put_folio;
3648
3649 if (need_balance && !IS_NOQUOTA(inode) &&
3650 has_not_enough_free_secs(sbi, 0, 0)) {
3651 folio_unlock(folio);
3652 f2fs_balance_fs(sbi, true);
3653 folio_lock(folio);
3654 if (folio->mapping != mapping) {
3655 /* The folio got truncated from under us */
3656 folio_unlock(folio);
3657 folio_put(folio);
3658 goto repeat;
3659 }
3660 }
3661
3662 f2fs_wait_on_page_writeback(&folio->page, DATA, false, true);
3663
3664 if (len == folio_size(folio) || folio_test_uptodate(folio))
3665 return 0;
3666
3667 if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
3668 !f2fs_verity_in_progress(inode)) {
3669 folio_zero_segment(folio, len, folio_size(folio));
3670 return 0;
3671 }
3672
3673 if (blkaddr == NEW_ADDR) {
3674 folio_zero_segment(folio, 0, folio_size(folio));
3675 folio_mark_uptodate(folio);
3676 } else {
3677 if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
3678 DATA_GENERIC_ENHANCE_READ)) {
3679 err = -EFSCORRUPTED;
3680 goto put_folio;
3681 }
3682 err = f2fs_submit_page_read(use_cow ?
3683 F2FS_I(inode)->cow_inode : inode,
3684 folio, blkaddr, 0, true);
3685 if (err)
3686 goto put_folio;
3687
3688 folio_lock(folio);
3689 if (unlikely(folio->mapping != mapping)) {
3690 folio_unlock(folio);
3691 folio_put(folio);
3692 goto repeat;
3693 }
3694 if (unlikely(!folio_test_uptodate(folio))) {
3695 err = -EIO;
3696 goto put_folio;
3697 }
3698 }
3699 return 0;
3700
3701 put_folio:
3702 folio_unlock(folio);
3703 folio_put(folio);
3704 fail:
3705 f2fs_write_failed(inode, pos + len);
3706 return err;
3707 }
3708
f2fs_write_end(struct file * file,struct address_space * mapping,loff_t pos,unsigned len,unsigned copied,struct folio * folio,void * fsdata)3709 static int f2fs_write_end(struct file *file,
3710 struct address_space *mapping,
3711 loff_t pos, unsigned len, unsigned copied,
3712 struct folio *folio, void *fsdata)
3713 {
3714 struct inode *inode = folio->mapping->host;
3715
3716 trace_f2fs_write_end(inode, pos, len, copied);
3717
3718 /*
3719 * This should be come from len == PAGE_SIZE, and we expect copied
3720 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
3721 * let generic_perform_write() try to copy data again through copied=0.
3722 */
3723 if (!folio_test_uptodate(folio)) {
3724 if (unlikely(copied != len))
3725 copied = 0;
3726 else
3727 folio_mark_uptodate(folio);
3728 }
3729
3730 #ifdef CONFIG_F2FS_FS_COMPRESSION
3731 /* overwrite compressed file */
3732 if (f2fs_compressed_file(inode) && fsdata) {
3733 f2fs_compress_write_end(inode, fsdata, folio->index, copied);
3734 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3735
3736 if (pos + copied > i_size_read(inode) &&
3737 !f2fs_verity_in_progress(inode))
3738 f2fs_i_size_write(inode, pos + copied);
3739 return copied;
3740 }
3741 #endif
3742
3743 if (!copied)
3744 goto unlock_out;
3745
3746 folio_mark_dirty(folio);
3747
3748 if (f2fs_is_atomic_file(inode))
3749 set_page_private_atomic(folio_page(folio, 0));
3750
3751 if (pos + copied > i_size_read(inode) &&
3752 !f2fs_verity_in_progress(inode)) {
3753 f2fs_i_size_write(inode, pos + copied);
3754 if (f2fs_is_atomic_file(inode))
3755 f2fs_i_size_write(F2FS_I(inode)->cow_inode,
3756 pos + copied);
3757 }
3758 unlock_out:
3759 folio_unlock(folio);
3760 folio_put(folio);
3761 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3762 return copied;
3763 }
3764
f2fs_invalidate_folio(struct folio * folio,size_t offset,size_t length)3765 void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
3766 {
3767 struct inode *inode = folio->mapping->host;
3768 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3769
3770 if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
3771 (offset || length != folio_size(folio)))
3772 return;
3773
3774 if (folio_test_dirty(folio)) {
3775 if (inode->i_ino == F2FS_META_INO(sbi)) {
3776 dec_page_count(sbi, F2FS_DIRTY_META);
3777 } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
3778 dec_page_count(sbi, F2FS_DIRTY_NODES);
3779 } else {
3780 inode_dec_dirty_pages(inode);
3781 f2fs_remove_dirty_inode(inode);
3782 }
3783 }
3784 clear_page_private_all(&folio->page);
3785 }
3786
f2fs_release_folio(struct folio * folio,gfp_t wait)3787 bool f2fs_release_folio(struct folio *folio, gfp_t wait)
3788 {
3789 /* If this is dirty folio, keep private data */
3790 if (folio_test_dirty(folio))
3791 return false;
3792
3793 clear_page_private_all(&folio->page);
3794 return true;
3795 }
3796
f2fs_dirty_data_folio(struct address_space * mapping,struct folio * folio)3797 static bool f2fs_dirty_data_folio(struct address_space *mapping,
3798 struct folio *folio)
3799 {
3800 struct inode *inode = mapping->host;
3801
3802 trace_f2fs_set_page_dirty(folio, DATA);
3803
3804 if (!folio_test_uptodate(folio))
3805 folio_mark_uptodate(folio);
3806 BUG_ON(folio_test_swapcache(folio));
3807
3808 if (filemap_dirty_folio(mapping, folio)) {
3809 f2fs_update_dirty_folio(inode, folio);
3810 return true;
3811 }
3812 return false;
3813 }
3814
3815
f2fs_bmap_compress(struct inode * inode,sector_t block)3816 static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
3817 {
3818 #ifdef CONFIG_F2FS_FS_COMPRESSION
3819 struct dnode_of_data dn;
3820 sector_t start_idx, blknr = 0;
3821 int ret;
3822
3823 start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
3824
3825 set_new_dnode(&dn, inode, NULL, NULL, 0);
3826 ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
3827 if (ret)
3828 return 0;
3829
3830 if (dn.data_blkaddr != COMPRESS_ADDR) {
3831 dn.ofs_in_node += block - start_idx;
3832 blknr = f2fs_data_blkaddr(&dn);
3833 if (!__is_valid_data_blkaddr(blknr))
3834 blknr = 0;
3835 }
3836
3837 f2fs_put_dnode(&dn);
3838 return blknr;
3839 #else
3840 return 0;
3841 #endif
3842 }
3843
3844
f2fs_bmap(struct address_space * mapping,sector_t block)3845 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
3846 {
3847 struct inode *inode = mapping->host;
3848 sector_t blknr = 0;
3849
3850 if (f2fs_has_inline_data(inode))
3851 goto out;
3852
3853 /* make sure allocating whole blocks */
3854 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
3855 filemap_write_and_wait(mapping);
3856
3857 /* Block number less than F2FS MAX BLOCKS */
3858 if (unlikely(block >= max_file_blocks(inode)))
3859 goto out;
3860
3861 if (f2fs_compressed_file(inode)) {
3862 blknr = f2fs_bmap_compress(inode, block);
3863 } else {
3864 struct f2fs_map_blocks map;
3865
3866 memset(&map, 0, sizeof(map));
3867 map.m_lblk = block;
3868 map.m_len = 1;
3869 map.m_next_pgofs = NULL;
3870 map.m_seg_type = NO_CHECK_TYPE;
3871
3872 if (!f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_BMAP))
3873 blknr = map.m_pblk;
3874 }
3875 out:
3876 trace_f2fs_bmap(inode, block, blknr);
3877 return blknr;
3878 }
3879
3880 #ifdef CONFIG_SWAP
f2fs_migrate_blocks(struct inode * inode,block_t start_blk,unsigned int blkcnt)3881 static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
3882 unsigned int blkcnt)
3883 {
3884 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3885 unsigned int blkofs;
3886 unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
3887 unsigned int end_blk = start_blk + blkcnt - 1;
3888 unsigned int secidx = start_blk / blk_per_sec;
3889 unsigned int end_sec;
3890 int ret = 0;
3891
3892 if (!blkcnt)
3893 return 0;
3894 end_sec = end_blk / blk_per_sec;
3895
3896 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3897 filemap_invalidate_lock(inode->i_mapping);
3898
3899 set_inode_flag(inode, FI_ALIGNED_WRITE);
3900 set_inode_flag(inode, FI_OPU_WRITE);
3901
3902 for (; secidx <= end_sec; secidx++) {
3903 unsigned int blkofs_end = secidx == end_sec ?
3904 end_blk % blk_per_sec : blk_per_sec - 1;
3905
3906 f2fs_down_write(&sbi->pin_sem);
3907
3908 ret = f2fs_allocate_pinning_section(sbi);
3909 if (ret) {
3910 f2fs_up_write(&sbi->pin_sem);
3911 break;
3912 }
3913
3914 set_inode_flag(inode, FI_SKIP_WRITES);
3915
3916 for (blkofs = 0; blkofs <= blkofs_end; blkofs++) {
3917 struct page *page;
3918 unsigned int blkidx = secidx * blk_per_sec + blkofs;
3919
3920 page = f2fs_get_lock_data_page(inode, blkidx, true);
3921 if (IS_ERR(page)) {
3922 f2fs_up_write(&sbi->pin_sem);
3923 ret = PTR_ERR(page);
3924 goto done;
3925 }
3926
3927 set_page_dirty(page);
3928 f2fs_put_page(page, 1);
3929 }
3930
3931 clear_inode_flag(inode, FI_SKIP_WRITES);
3932
3933 ret = filemap_fdatawrite(inode->i_mapping);
3934
3935 f2fs_up_write(&sbi->pin_sem);
3936
3937 if (ret)
3938 break;
3939 }
3940
3941 done:
3942 clear_inode_flag(inode, FI_SKIP_WRITES);
3943 clear_inode_flag(inode, FI_OPU_WRITE);
3944 clear_inode_flag(inode, FI_ALIGNED_WRITE);
3945
3946 filemap_invalidate_unlock(inode->i_mapping);
3947 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3948
3949 return ret;
3950 }
3951
check_swap_activate(struct swap_info_struct * sis,struct file * swap_file,sector_t * span)3952 static int check_swap_activate(struct swap_info_struct *sis,
3953 struct file *swap_file, sector_t *span)
3954 {
3955 struct address_space *mapping = swap_file->f_mapping;
3956 struct inode *inode = mapping->host;
3957 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3958 block_t cur_lblock;
3959 block_t last_lblock;
3960 block_t pblock;
3961 block_t lowest_pblock = -1;
3962 block_t highest_pblock = 0;
3963 int nr_extents = 0;
3964 unsigned int nr_pblocks;
3965 unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
3966 unsigned int not_aligned = 0;
3967 int ret = 0;
3968
3969 /*
3970 * Map all the blocks into the extent list. This code doesn't try
3971 * to be very smart.
3972 */
3973 cur_lblock = 0;
3974 last_lblock = bytes_to_blks(inode, i_size_read(inode));
3975
3976 while (cur_lblock < last_lblock && cur_lblock < sis->max) {
3977 struct f2fs_map_blocks map;
3978 retry:
3979 cond_resched();
3980
3981 memset(&map, 0, sizeof(map));
3982 map.m_lblk = cur_lblock;
3983 map.m_len = last_lblock - cur_lblock;
3984 map.m_next_pgofs = NULL;
3985 map.m_next_extent = NULL;
3986 map.m_seg_type = NO_CHECK_TYPE;
3987 map.m_may_create = false;
3988
3989 ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP);
3990 if (ret)
3991 goto out;
3992
3993 /* hole */
3994 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
3995 f2fs_err(sbi, "Swapfile has holes");
3996 ret = -EINVAL;
3997 goto out;
3998 }
3999
4000 pblock = map.m_pblk;
4001 nr_pblocks = map.m_len;
4002
4003 if ((pblock - SM_I(sbi)->main_blkaddr) % blks_per_sec ||
4004 nr_pblocks % blks_per_sec ||
4005 !f2fs_valid_pinned_area(sbi, pblock)) {
4006 bool last_extent = false;
4007
4008 not_aligned++;
4009
4010 nr_pblocks = roundup(nr_pblocks, blks_per_sec);
4011 if (cur_lblock + nr_pblocks > sis->max)
4012 nr_pblocks -= blks_per_sec;
4013
4014 /* this extent is last one */
4015 if (!nr_pblocks) {
4016 nr_pblocks = last_lblock - cur_lblock;
4017 last_extent = true;
4018 }
4019
4020 ret = f2fs_migrate_blocks(inode, cur_lblock,
4021 nr_pblocks);
4022 if (ret) {
4023 if (ret == -ENOENT)
4024 ret = -EINVAL;
4025 goto out;
4026 }
4027
4028 if (!last_extent)
4029 goto retry;
4030 }
4031
4032 if (cur_lblock + nr_pblocks >= sis->max)
4033 nr_pblocks = sis->max - cur_lblock;
4034
4035 if (cur_lblock) { /* exclude the header page */
4036 if (pblock < lowest_pblock)
4037 lowest_pblock = pblock;
4038 if (pblock + nr_pblocks - 1 > highest_pblock)
4039 highest_pblock = pblock + nr_pblocks - 1;
4040 }
4041
4042 /*
4043 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
4044 */
4045 ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
4046 if (ret < 0)
4047 goto out;
4048 nr_extents += ret;
4049 cur_lblock += nr_pblocks;
4050 }
4051 ret = nr_extents;
4052 *span = 1 + highest_pblock - lowest_pblock;
4053 if (cur_lblock == 0)
4054 cur_lblock = 1; /* force Empty message */
4055 sis->max = cur_lblock;
4056 sis->pages = cur_lblock - 1;
4057 sis->highest_bit = cur_lblock - 1;
4058 out:
4059 if (not_aligned)
4060 f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%lu * N)",
4061 not_aligned, blks_per_sec * F2FS_BLKSIZE);
4062 return ret;
4063 }
4064
f2fs_swap_activate(struct swap_info_struct * sis,struct file * file,sector_t * span)4065 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
4066 sector_t *span)
4067 {
4068 struct inode *inode = file_inode(file);
4069 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4070 int ret;
4071
4072 if (!S_ISREG(inode->i_mode))
4073 return -EINVAL;
4074
4075 if (f2fs_readonly(sbi->sb))
4076 return -EROFS;
4077
4078 if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) {
4079 f2fs_err(sbi, "Swapfile not supported in LFS mode");
4080 return -EINVAL;
4081 }
4082
4083 ret = f2fs_convert_inline_inode(inode);
4084 if (ret)
4085 return ret;
4086
4087 if (!f2fs_disable_compressed_file(inode))
4088 return -EINVAL;
4089
4090 ret = filemap_fdatawrite(inode->i_mapping);
4091 if (ret < 0)
4092 return ret;
4093
4094 f2fs_precache_extents(inode);
4095
4096 ret = check_swap_activate(sis, file, span);
4097 if (ret < 0)
4098 return ret;
4099
4100 stat_inc_swapfile_inode(inode);
4101 set_inode_flag(inode, FI_PIN_FILE);
4102 f2fs_update_time(sbi, REQ_TIME);
4103 return ret;
4104 }
4105
f2fs_swap_deactivate(struct file * file)4106 static void f2fs_swap_deactivate(struct file *file)
4107 {
4108 struct inode *inode = file_inode(file);
4109
4110 stat_dec_swapfile_inode(inode);
4111 clear_inode_flag(inode, FI_PIN_FILE);
4112 }
4113 #else
f2fs_swap_activate(struct swap_info_struct * sis,struct file * file,sector_t * span)4114 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
4115 sector_t *span)
4116 {
4117 return -EOPNOTSUPP;
4118 }
4119
f2fs_swap_deactivate(struct file * file)4120 static void f2fs_swap_deactivate(struct file *file)
4121 {
4122 }
4123 #endif
4124
4125 const struct address_space_operations f2fs_dblock_aops = {
4126 .read_folio = f2fs_read_data_folio,
4127 .readahead = f2fs_readahead,
4128 .writepage = f2fs_write_data_page,
4129 .writepages = f2fs_write_data_pages,
4130 .write_begin = f2fs_write_begin,
4131 .write_end = f2fs_write_end,
4132 .dirty_folio = f2fs_dirty_data_folio,
4133 .migrate_folio = filemap_migrate_folio,
4134 .invalidate_folio = f2fs_invalidate_folio,
4135 .release_folio = f2fs_release_folio,
4136 .bmap = f2fs_bmap,
4137 .swap_activate = f2fs_swap_activate,
4138 .swap_deactivate = f2fs_swap_deactivate,
4139 };
4140
f2fs_clear_page_cache_dirty_tag(struct folio * folio)4141 void f2fs_clear_page_cache_dirty_tag(struct folio *folio)
4142 {
4143 struct address_space *mapping = folio->mapping;
4144 unsigned long flags;
4145
4146 xa_lock_irqsave(&mapping->i_pages, flags);
4147 __xa_clear_mark(&mapping->i_pages, folio->index,
4148 PAGECACHE_TAG_DIRTY);
4149 xa_unlock_irqrestore(&mapping->i_pages, flags);
4150 }
4151
f2fs_init_post_read_processing(void)4152 int __init f2fs_init_post_read_processing(void)
4153 {
4154 bio_post_read_ctx_cache =
4155 kmem_cache_create("f2fs_bio_post_read_ctx",
4156 sizeof(struct bio_post_read_ctx), 0, 0, NULL);
4157 if (!bio_post_read_ctx_cache)
4158 goto fail;
4159 bio_post_read_ctx_pool =
4160 mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
4161 bio_post_read_ctx_cache);
4162 if (!bio_post_read_ctx_pool)
4163 goto fail_free_cache;
4164 return 0;
4165
4166 fail_free_cache:
4167 kmem_cache_destroy(bio_post_read_ctx_cache);
4168 fail:
4169 return -ENOMEM;
4170 }
4171
f2fs_destroy_post_read_processing(void)4172 void f2fs_destroy_post_read_processing(void)
4173 {
4174 mempool_destroy(bio_post_read_ctx_pool);
4175 kmem_cache_destroy(bio_post_read_ctx_cache);
4176 }
4177
f2fs_init_post_read_wq(struct f2fs_sb_info * sbi)4178 int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
4179 {
4180 if (!f2fs_sb_has_encrypt(sbi) &&
4181 !f2fs_sb_has_verity(sbi) &&
4182 !f2fs_sb_has_compression(sbi))
4183 return 0;
4184
4185 sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq",
4186 WQ_UNBOUND | WQ_HIGHPRI,
4187 num_online_cpus());
4188 return sbi->post_read_wq ? 0 : -ENOMEM;
4189 }
4190
f2fs_destroy_post_read_wq(struct f2fs_sb_info * sbi)4191 void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi)
4192 {
4193 if (sbi->post_read_wq)
4194 destroy_workqueue(sbi->post_read_wq);
4195 }
4196
f2fs_init_bio_entry_cache(void)4197 int __init f2fs_init_bio_entry_cache(void)
4198 {
4199 bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab",
4200 sizeof(struct bio_entry));
4201 return bio_entry_slab ? 0 : -ENOMEM;
4202 }
4203
f2fs_destroy_bio_entry_cache(void)4204 void f2fs_destroy_bio_entry_cache(void)
4205 {
4206 kmem_cache_destroy(bio_entry_slab);
4207 }
4208
f2fs_iomap_begin(struct inode * inode,loff_t offset,loff_t length,unsigned int flags,struct iomap * iomap,struct iomap * srcmap)4209 static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
4210 unsigned int flags, struct iomap *iomap,
4211 struct iomap *srcmap)
4212 {
4213 struct f2fs_map_blocks map = {};
4214 pgoff_t next_pgofs = 0;
4215 int err;
4216
4217 map.m_lblk = bytes_to_blks(inode, offset);
4218 map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
4219 map.m_next_pgofs = &next_pgofs;
4220 map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
4221 inode->i_write_hint);
4222 if (flags & IOMAP_WRITE)
4223 map.m_may_create = true;
4224
4225 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO);
4226 if (err)
4227 return err;
4228
4229 iomap->offset = blks_to_bytes(inode, map.m_lblk);
4230
4231 /*
4232 * When inline encryption is enabled, sometimes I/O to an encrypted file
4233 * has to be broken up to guarantee DUN contiguity. Handle this by
4234 * limiting the length of the mapping returned.
4235 */
4236 map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
4237
4238 /*
4239 * We should never see delalloc or compressed extents here based on
4240 * prior flushing and checks.
4241 */
4242 if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR))
4243 return -EINVAL;
4244
4245 if (map.m_flags & F2FS_MAP_MAPPED) {
4246 if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR))
4247 return -EINVAL;
4248
4249 iomap->length = blks_to_bytes(inode, map.m_len);
4250 iomap->type = IOMAP_MAPPED;
4251 iomap->flags |= IOMAP_F_MERGED;
4252 iomap->bdev = map.m_bdev;
4253 iomap->addr = blks_to_bytes(inode, map.m_pblk);
4254 } else {
4255 if (flags & IOMAP_WRITE)
4256 return -ENOTBLK;
4257
4258 if (map.m_pblk == NULL_ADDR) {
4259 iomap->length = blks_to_bytes(inode, next_pgofs) -
4260 iomap->offset;
4261 iomap->type = IOMAP_HOLE;
4262 } else if (map.m_pblk == NEW_ADDR) {
4263 iomap->length = blks_to_bytes(inode, map.m_len);
4264 iomap->type = IOMAP_UNWRITTEN;
4265 } else {
4266 f2fs_bug_on(F2FS_I_SB(inode), 1);
4267 }
4268 iomap->addr = IOMAP_NULL_ADDR;
4269 }
4270
4271 if (map.m_flags & F2FS_MAP_NEW)
4272 iomap->flags |= IOMAP_F_NEW;
4273 if ((inode->i_state & I_DIRTY_DATASYNC) ||
4274 offset + length > i_size_read(inode))
4275 iomap->flags |= IOMAP_F_DIRTY;
4276
4277 return 0;
4278 }
4279
4280 const struct iomap_ops f2fs_iomap_ops = {
4281 .iomap_begin = f2fs_iomap_begin,
4282 };
4283