xref: /linux/fs/f2fs/data.c (revision ca55b2fef3a9373fcfc30f82fd26bc7fccbda732)
1 /*
2  * fs/f2fs/data.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/buffer_head.h>
14 #include <linux/mpage.h>
15 #include <linux/writeback.h>
16 #include <linux/backing-dev.h>
17 #include <linux/pagevec.h>
18 #include <linux/blkdev.h>
19 #include <linux/bio.h>
20 #include <linux/prefetch.h>
21 #include <linux/uio.h>
22 #include <linux/cleancache.h>
23 
24 #include "f2fs.h"
25 #include "node.h"
26 #include "segment.h"
27 #include "trace.h"
28 #include <trace/events/f2fs.h>
29 
30 static void f2fs_read_end_io(struct bio *bio)
31 {
32 	struct bio_vec *bvec;
33 	int i;
34 
35 	if (f2fs_bio_encrypted(bio)) {
36 		if (bio->bi_error) {
37 			f2fs_release_crypto_ctx(bio->bi_private);
38 		} else {
39 			f2fs_end_io_crypto_work(bio->bi_private, bio);
40 			return;
41 		}
42 	}
43 
44 	bio_for_each_segment_all(bvec, bio, i) {
45 		struct page *page = bvec->bv_page;
46 
47 		if (!bio->bi_error) {
48 			SetPageUptodate(page);
49 		} else {
50 			ClearPageUptodate(page);
51 			SetPageError(page);
52 		}
53 		unlock_page(page);
54 	}
55 	bio_put(bio);
56 }
57 
58 static void f2fs_write_end_io(struct bio *bio)
59 {
60 	struct f2fs_sb_info *sbi = bio->bi_private;
61 	struct bio_vec *bvec;
62 	int i;
63 
64 	bio_for_each_segment_all(bvec, bio, i) {
65 		struct page *page = bvec->bv_page;
66 
67 		f2fs_restore_and_release_control_page(&page);
68 
69 		if (unlikely(bio->bi_error)) {
70 			set_page_dirty(page);
71 			set_bit(AS_EIO, &page->mapping->flags);
72 			f2fs_stop_checkpoint(sbi);
73 		}
74 		end_page_writeback(page);
75 		dec_page_count(sbi, F2FS_WRITEBACK);
76 	}
77 
78 	if (!get_pages(sbi, F2FS_WRITEBACK) &&
79 			!list_empty(&sbi->cp_wait.task_list))
80 		wake_up(&sbi->cp_wait);
81 
82 	bio_put(bio);
83 }
84 
85 /*
86  * Low-level block read/write IO operations.
87  */
88 static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
89 				int npages, bool is_read)
90 {
91 	struct bio *bio;
92 
93 	bio = f2fs_bio_alloc(npages);
94 
95 	bio->bi_bdev = sbi->sb->s_bdev;
96 	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
97 	bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
98 	bio->bi_private = is_read ? NULL : sbi;
99 
100 	return bio;
101 }
102 
103 static void __submit_merged_bio(struct f2fs_bio_info *io)
104 {
105 	struct f2fs_io_info *fio = &io->fio;
106 
107 	if (!io->bio)
108 		return;
109 
110 	if (is_read_io(fio->rw))
111 		trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio);
112 	else
113 		trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
114 
115 	submit_bio(fio->rw, io->bio);
116 	io->bio = NULL;
117 }
118 
119 void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
120 				enum page_type type, int rw)
121 {
122 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
123 	struct f2fs_bio_info *io;
124 
125 	io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
126 
127 	down_write(&io->io_rwsem);
128 
129 	/* change META to META_FLUSH in the checkpoint procedure */
130 	if (type >= META_FLUSH) {
131 		io->fio.type = META_FLUSH;
132 		if (test_opt(sbi, NOBARRIER))
133 			io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
134 		else
135 			io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
136 	}
137 	__submit_merged_bio(io);
138 	up_write(&io->io_rwsem);
139 }
140 
141 /*
142  * Fill the locked page with data located in the block address.
143  * Return unlocked page.
144  */
145 int f2fs_submit_page_bio(struct f2fs_io_info *fio)
146 {
147 	struct bio *bio;
148 	struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page;
149 
150 	trace_f2fs_submit_page_bio(page, fio);
151 	f2fs_trace_ios(fio, 0);
152 
153 	/* Allocate a new bio */
154 	bio = __bio_alloc(fio->sbi, fio->blk_addr, 1, is_read_io(fio->rw));
155 
156 	if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
157 		bio_put(bio);
158 		return -EFAULT;
159 	}
160 
161 	submit_bio(fio->rw, bio);
162 	return 0;
163 }
164 
165 void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
166 {
167 	struct f2fs_sb_info *sbi = fio->sbi;
168 	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
169 	struct f2fs_bio_info *io;
170 	bool is_read = is_read_io(fio->rw);
171 	struct page *bio_page;
172 
173 	io = is_read ? &sbi->read_io : &sbi->write_io[btype];
174 
175 	verify_block_addr(sbi, fio->blk_addr);
176 
177 	down_write(&io->io_rwsem);
178 
179 	if (!is_read)
180 		inc_page_count(sbi, F2FS_WRITEBACK);
181 
182 	if (io->bio && (io->last_block_in_bio != fio->blk_addr - 1 ||
183 						io->fio.rw != fio->rw))
184 		__submit_merged_bio(io);
185 alloc_new:
186 	if (io->bio == NULL) {
187 		int bio_blocks = MAX_BIO_BLOCKS(sbi);
188 
189 		io->bio = __bio_alloc(sbi, fio->blk_addr, bio_blocks, is_read);
190 		io->fio = *fio;
191 	}
192 
193 	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
194 
195 	if (bio_add_page(io->bio, bio_page, PAGE_CACHE_SIZE, 0) <
196 							PAGE_CACHE_SIZE) {
197 		__submit_merged_bio(io);
198 		goto alloc_new;
199 	}
200 
201 	io->last_block_in_bio = fio->blk_addr;
202 	f2fs_trace_ios(fio, 0);
203 
204 	up_write(&io->io_rwsem);
205 	trace_f2fs_submit_page_mbio(fio->page, fio);
206 }
207 
208 /*
209  * Lock ordering for the change of data block address:
210  * ->data_page
211  *  ->node_page
212  *    update block addresses in the node page
213  */
214 void set_data_blkaddr(struct dnode_of_data *dn)
215 {
216 	struct f2fs_node *rn;
217 	__le32 *addr_array;
218 	struct page *node_page = dn->node_page;
219 	unsigned int ofs_in_node = dn->ofs_in_node;
220 
221 	f2fs_wait_on_page_writeback(node_page, NODE);
222 
223 	rn = F2FS_NODE(node_page);
224 
225 	/* Get physical address of data block */
226 	addr_array = blkaddr_in_node(rn);
227 	addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
228 	set_page_dirty(node_page);
229 }
230 
231 int reserve_new_block(struct dnode_of_data *dn)
232 {
233 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
234 
235 	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
236 		return -EPERM;
237 	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
238 		return -ENOSPC;
239 
240 	trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
241 
242 	dn->data_blkaddr = NEW_ADDR;
243 	set_data_blkaddr(dn);
244 	mark_inode_dirty(dn->inode);
245 	sync_inode_page(dn);
246 	return 0;
247 }
248 
249 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
250 {
251 	bool need_put = dn->inode_page ? false : true;
252 	int err;
253 
254 	err = get_dnode_of_data(dn, index, ALLOC_NODE);
255 	if (err)
256 		return err;
257 
258 	if (dn->data_blkaddr == NULL_ADDR)
259 		err = reserve_new_block(dn);
260 	if (err || need_put)
261 		f2fs_put_dnode(dn);
262 	return err;
263 }
264 
265 int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
266 {
267 	struct extent_info ei;
268 	struct inode *inode = dn->inode;
269 
270 	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
271 		dn->data_blkaddr = ei.blk + index - ei.fofs;
272 		return 0;
273 	}
274 
275 	return f2fs_reserve_block(dn, index);
276 }
277 
278 struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw)
279 {
280 	struct address_space *mapping = inode->i_mapping;
281 	struct dnode_of_data dn;
282 	struct page *page;
283 	struct extent_info ei;
284 	int err;
285 	struct f2fs_io_info fio = {
286 		.sbi = F2FS_I_SB(inode),
287 		.type = DATA,
288 		.rw = rw,
289 		.encrypted_page = NULL,
290 	};
291 
292 	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
293 		return read_mapping_page(mapping, index, NULL);
294 
295 	page = grab_cache_page(mapping, index);
296 	if (!page)
297 		return ERR_PTR(-ENOMEM);
298 
299 	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
300 		dn.data_blkaddr = ei.blk + index - ei.fofs;
301 		goto got_it;
302 	}
303 
304 	set_new_dnode(&dn, inode, NULL, NULL, 0);
305 	err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
306 	if (err)
307 		goto put_err;
308 	f2fs_put_dnode(&dn);
309 
310 	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
311 		err = -ENOENT;
312 		goto put_err;
313 	}
314 got_it:
315 	if (PageUptodate(page)) {
316 		unlock_page(page);
317 		return page;
318 	}
319 
320 	/*
321 	 * A new dentry page is allocated but not able to be written, since its
322 	 * new inode page couldn't be allocated due to -ENOSPC.
323 	 * In such the case, its blkaddr can be remained as NEW_ADDR.
324 	 * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
325 	 */
326 	if (dn.data_blkaddr == NEW_ADDR) {
327 		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
328 		SetPageUptodate(page);
329 		unlock_page(page);
330 		return page;
331 	}
332 
333 	fio.blk_addr = dn.data_blkaddr;
334 	fio.page = page;
335 	err = f2fs_submit_page_bio(&fio);
336 	if (err)
337 		goto put_err;
338 	return page;
339 
340 put_err:
341 	f2fs_put_page(page, 1);
342 	return ERR_PTR(err);
343 }
344 
345 struct page *find_data_page(struct inode *inode, pgoff_t index)
346 {
347 	struct address_space *mapping = inode->i_mapping;
348 	struct page *page;
349 
350 	page = find_get_page(mapping, index);
351 	if (page && PageUptodate(page))
352 		return page;
353 	f2fs_put_page(page, 0);
354 
355 	page = get_read_data_page(inode, index, READ_SYNC);
356 	if (IS_ERR(page))
357 		return page;
358 
359 	if (PageUptodate(page))
360 		return page;
361 
362 	wait_on_page_locked(page);
363 	if (unlikely(!PageUptodate(page))) {
364 		f2fs_put_page(page, 0);
365 		return ERR_PTR(-EIO);
366 	}
367 	return page;
368 }
369 
370 /*
371  * If it tries to access a hole, return an error.
372  * Because, the callers, functions in dir.c and GC, should be able to know
373  * whether this page exists or not.
374  */
375 struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
376 {
377 	struct address_space *mapping = inode->i_mapping;
378 	struct page *page;
379 repeat:
380 	page = get_read_data_page(inode, index, READ_SYNC);
381 	if (IS_ERR(page))
382 		return page;
383 
384 	/* wait for read completion */
385 	lock_page(page);
386 	if (unlikely(!PageUptodate(page))) {
387 		f2fs_put_page(page, 1);
388 		return ERR_PTR(-EIO);
389 	}
390 	if (unlikely(page->mapping != mapping)) {
391 		f2fs_put_page(page, 1);
392 		goto repeat;
393 	}
394 	return page;
395 }
396 
397 /*
398  * Caller ensures that this data page is never allocated.
399  * A new zero-filled data page is allocated in the page cache.
400  *
401  * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
402  * f2fs_unlock_op().
403  * Note that, ipage is set only by make_empty_dir, and if any error occur,
404  * ipage should be released by this function.
405  */
406 struct page *get_new_data_page(struct inode *inode,
407 		struct page *ipage, pgoff_t index, bool new_i_size)
408 {
409 	struct address_space *mapping = inode->i_mapping;
410 	struct page *page;
411 	struct dnode_of_data dn;
412 	int err;
413 repeat:
414 	page = grab_cache_page(mapping, index);
415 	if (!page) {
416 		/*
417 		 * before exiting, we should make sure ipage will be released
418 		 * if any error occur.
419 		 */
420 		f2fs_put_page(ipage, 1);
421 		return ERR_PTR(-ENOMEM);
422 	}
423 
424 	set_new_dnode(&dn, inode, ipage, NULL, 0);
425 	err = f2fs_reserve_block(&dn, index);
426 	if (err) {
427 		f2fs_put_page(page, 1);
428 		return ERR_PTR(err);
429 	}
430 	if (!ipage)
431 		f2fs_put_dnode(&dn);
432 
433 	if (PageUptodate(page))
434 		goto got_it;
435 
436 	if (dn.data_blkaddr == NEW_ADDR) {
437 		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
438 		SetPageUptodate(page);
439 	} else {
440 		f2fs_put_page(page, 1);
441 
442 		page = get_read_data_page(inode, index, READ_SYNC);
443 		if (IS_ERR(page))
444 			goto repeat;
445 
446 		/* wait for read completion */
447 		lock_page(page);
448 	}
449 got_it:
450 	if (new_i_size &&
451 		i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
452 		i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
453 		/* Only the directory inode sets new_i_size */
454 		set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
455 	}
456 	return page;
457 }
458 
459 static int __allocate_data_block(struct dnode_of_data *dn)
460 {
461 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
462 	struct f2fs_inode_info *fi = F2FS_I(dn->inode);
463 	struct f2fs_summary sum;
464 	struct node_info ni;
465 	int seg = CURSEG_WARM_DATA;
466 	pgoff_t fofs;
467 
468 	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
469 		return -EPERM;
470 
471 	dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
472 	if (dn->data_blkaddr == NEW_ADDR)
473 		goto alloc;
474 
475 	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
476 		return -ENOSPC;
477 
478 alloc:
479 	get_node_info(sbi, dn->nid, &ni);
480 	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
481 
482 	if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
483 		seg = CURSEG_DIRECT_IO;
484 
485 	allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
486 								&sum, seg);
487 	set_data_blkaddr(dn);
488 
489 	/* update i_size */
490 	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
491 							dn->ofs_in_node;
492 	if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT))
493 		i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT));
494 
495 	/* direct IO doesn't use extent cache to maximize the performance */
496 	f2fs_drop_largest_extent(dn->inode, fofs);
497 
498 	return 0;
499 }
500 
501 static void __allocate_data_blocks(struct inode *inode, loff_t offset,
502 							size_t count)
503 {
504 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
505 	struct dnode_of_data dn;
506 	u64 start = F2FS_BYTES_TO_BLK(offset);
507 	u64 len = F2FS_BYTES_TO_BLK(count);
508 	bool allocated;
509 	u64 end_offset;
510 
511 	while (len) {
512 		f2fs_balance_fs(sbi);
513 		f2fs_lock_op(sbi);
514 
515 		/* When reading holes, we need its node page */
516 		set_new_dnode(&dn, inode, NULL, NULL, 0);
517 		if (get_dnode_of_data(&dn, start, ALLOC_NODE))
518 			goto out;
519 
520 		allocated = false;
521 		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
522 
523 		while (dn.ofs_in_node < end_offset && len) {
524 			block_t blkaddr;
525 
526 			blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
527 			if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
528 				if (__allocate_data_block(&dn))
529 					goto sync_out;
530 				allocated = true;
531 			}
532 			len--;
533 			start++;
534 			dn.ofs_in_node++;
535 		}
536 
537 		if (allocated)
538 			sync_inode_page(&dn);
539 
540 		f2fs_put_dnode(&dn);
541 		f2fs_unlock_op(sbi);
542 	}
543 	return;
544 
545 sync_out:
546 	if (allocated)
547 		sync_inode_page(&dn);
548 	f2fs_put_dnode(&dn);
549 out:
550 	f2fs_unlock_op(sbi);
551 	return;
552 }
553 
554 /*
555  * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
556  * f2fs_map_blocks structure.
557  * If original data blocks are allocated, then give them to blockdev.
558  * Otherwise,
559  *     a. preallocate requested block addresses
560  *     b. do not use extent cache for better performance
561  *     c. give the block addresses to blockdev
562  */
563 static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
564 						int create, int flag)
565 {
566 	unsigned int maxblocks = map->m_len;
567 	struct dnode_of_data dn;
568 	int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
569 	pgoff_t pgofs, end_offset;
570 	int err = 0, ofs = 1;
571 	struct extent_info ei;
572 	bool allocated = false;
573 
574 	map->m_len = 0;
575 	map->m_flags = 0;
576 
577 	/* it only supports block size == page size */
578 	pgofs =	(pgoff_t)map->m_lblk;
579 
580 	if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
581 		map->m_pblk = ei.blk + pgofs - ei.fofs;
582 		map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
583 		map->m_flags = F2FS_MAP_MAPPED;
584 		goto out;
585 	}
586 
587 	if (create)
588 		f2fs_lock_op(F2FS_I_SB(inode));
589 
590 	/* When reading holes, we need its node page */
591 	set_new_dnode(&dn, inode, NULL, NULL, 0);
592 	err = get_dnode_of_data(&dn, pgofs, mode);
593 	if (err) {
594 		if (err == -ENOENT)
595 			err = 0;
596 		goto unlock_out;
597 	}
598 	if (dn.data_blkaddr == NEW_ADDR) {
599 		if (flag == F2FS_GET_BLOCK_BMAP) {
600 			err = -ENOENT;
601 			goto put_out;
602 		} else if (flag == F2FS_GET_BLOCK_READ ||
603 				flag == F2FS_GET_BLOCK_DIO) {
604 			goto put_out;
605 		}
606 		/*
607 		 * if it is in fiemap call path (flag = F2FS_GET_BLOCK_FIEMAP),
608 		 * mark it as mapped and unwritten block.
609 		 */
610 	}
611 
612 	if (dn.data_blkaddr != NULL_ADDR) {
613 		map->m_flags = F2FS_MAP_MAPPED;
614 		map->m_pblk = dn.data_blkaddr;
615 		if (dn.data_blkaddr == NEW_ADDR)
616 			map->m_flags |= F2FS_MAP_UNWRITTEN;
617 	} else if (create) {
618 		err = __allocate_data_block(&dn);
619 		if (err)
620 			goto put_out;
621 		allocated = true;
622 		map->m_flags = F2FS_MAP_NEW | F2FS_MAP_MAPPED;
623 		map->m_pblk = dn.data_blkaddr;
624 	} else {
625 		if (flag == F2FS_GET_BLOCK_BMAP)
626 			err = -ENOENT;
627 		goto put_out;
628 	}
629 
630 	end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
631 	map->m_len = 1;
632 	dn.ofs_in_node++;
633 	pgofs++;
634 
635 get_next:
636 	if (dn.ofs_in_node >= end_offset) {
637 		if (allocated)
638 			sync_inode_page(&dn);
639 		allocated = false;
640 		f2fs_put_dnode(&dn);
641 
642 		set_new_dnode(&dn, inode, NULL, NULL, 0);
643 		err = get_dnode_of_data(&dn, pgofs, mode);
644 		if (err) {
645 			if (err == -ENOENT)
646 				err = 0;
647 			goto unlock_out;
648 		}
649 
650 		if (dn.data_blkaddr == NEW_ADDR &&
651 				flag != F2FS_GET_BLOCK_FIEMAP)
652 			goto put_out;
653 
654 		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
655 	}
656 
657 	if (maxblocks > map->m_len) {
658 		block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
659 		if (blkaddr == NULL_ADDR && create) {
660 			err = __allocate_data_block(&dn);
661 			if (err)
662 				goto sync_out;
663 			allocated = true;
664 			map->m_flags |= F2FS_MAP_NEW;
665 			blkaddr = dn.data_blkaddr;
666 		}
667 		/* Give more consecutive addresses for the readahead */
668 		if ((map->m_pblk != NEW_ADDR &&
669 				blkaddr == (map->m_pblk + ofs)) ||
670 				(map->m_pblk == NEW_ADDR &&
671 				blkaddr == NEW_ADDR)) {
672 			ofs++;
673 			dn.ofs_in_node++;
674 			pgofs++;
675 			map->m_len++;
676 			goto get_next;
677 		}
678 	}
679 sync_out:
680 	if (allocated)
681 		sync_inode_page(&dn);
682 put_out:
683 	f2fs_put_dnode(&dn);
684 unlock_out:
685 	if (create)
686 		f2fs_unlock_op(F2FS_I_SB(inode));
687 out:
688 	trace_f2fs_map_blocks(inode, map, err);
689 	return err;
690 }
691 
692 static int __get_data_block(struct inode *inode, sector_t iblock,
693 			struct buffer_head *bh, int create, int flag)
694 {
695 	struct f2fs_map_blocks map;
696 	int ret;
697 
698 	map.m_lblk = iblock;
699 	map.m_len = bh->b_size >> inode->i_blkbits;
700 
701 	ret = f2fs_map_blocks(inode, &map, create, flag);
702 	if (!ret) {
703 		map_bh(bh, inode->i_sb, map.m_pblk);
704 		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
705 		bh->b_size = map.m_len << inode->i_blkbits;
706 	}
707 	return ret;
708 }
709 
710 static int get_data_block(struct inode *inode, sector_t iblock,
711 			struct buffer_head *bh_result, int create, int flag)
712 {
713 	return __get_data_block(inode, iblock, bh_result, create, flag);
714 }
715 
716 static int get_data_block_dio(struct inode *inode, sector_t iblock,
717 			struct buffer_head *bh_result, int create)
718 {
719 	return __get_data_block(inode, iblock, bh_result, create,
720 						F2FS_GET_BLOCK_DIO);
721 }
722 
723 static int get_data_block_bmap(struct inode *inode, sector_t iblock,
724 			struct buffer_head *bh_result, int create)
725 {
726 	return __get_data_block(inode, iblock, bh_result, create,
727 						F2FS_GET_BLOCK_BMAP);
728 }
729 
730 static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
731 {
732 	return (offset >> inode->i_blkbits);
733 }
734 
735 static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
736 {
737 	return (blk << inode->i_blkbits);
738 }
739 
740 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
741 		u64 start, u64 len)
742 {
743 	struct buffer_head map_bh;
744 	sector_t start_blk, last_blk;
745 	loff_t isize = i_size_read(inode);
746 	u64 logical = 0, phys = 0, size = 0;
747 	u32 flags = 0;
748 	bool past_eof = false, whole_file = false;
749 	int ret = 0;
750 
751 	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
752 	if (ret)
753 		return ret;
754 
755 	mutex_lock(&inode->i_mutex);
756 
757 	if (len >= isize) {
758 		whole_file = true;
759 		len = isize;
760 	}
761 
762 	if (logical_to_blk(inode, len) == 0)
763 		len = blk_to_logical(inode, 1);
764 
765 	start_blk = logical_to_blk(inode, start);
766 	last_blk = logical_to_blk(inode, start + len - 1);
767 next:
768 	memset(&map_bh, 0, sizeof(struct buffer_head));
769 	map_bh.b_size = len;
770 
771 	ret = get_data_block(inode, start_blk, &map_bh, 0,
772 					F2FS_GET_BLOCK_FIEMAP);
773 	if (ret)
774 		goto out;
775 
776 	/* HOLE */
777 	if (!buffer_mapped(&map_bh)) {
778 		start_blk++;
779 
780 		if (!past_eof && blk_to_logical(inode, start_blk) >= isize)
781 			past_eof = 1;
782 
783 		if (past_eof && size) {
784 			flags |= FIEMAP_EXTENT_LAST;
785 			ret = fiemap_fill_next_extent(fieinfo, logical,
786 					phys, size, flags);
787 		} else if (size) {
788 			ret = fiemap_fill_next_extent(fieinfo, logical,
789 					phys, size, flags);
790 			size = 0;
791 		}
792 
793 		/* if we have holes up to/past EOF then we're done */
794 		if (start_blk > last_blk || past_eof || ret)
795 			goto out;
796 	} else {
797 		if (start_blk > last_blk && !whole_file) {
798 			ret = fiemap_fill_next_extent(fieinfo, logical,
799 					phys, size, flags);
800 			goto out;
801 		}
802 
803 		/*
804 		 * if size != 0 then we know we already have an extent
805 		 * to add, so add it.
806 		 */
807 		if (size) {
808 			ret = fiemap_fill_next_extent(fieinfo, logical,
809 					phys, size, flags);
810 			if (ret)
811 				goto out;
812 		}
813 
814 		logical = blk_to_logical(inode, start_blk);
815 		phys = blk_to_logical(inode, map_bh.b_blocknr);
816 		size = map_bh.b_size;
817 		flags = 0;
818 		if (buffer_unwritten(&map_bh))
819 			flags = FIEMAP_EXTENT_UNWRITTEN;
820 
821 		start_blk += logical_to_blk(inode, size);
822 
823 		/*
824 		 * If we are past the EOF, then we need to make sure as
825 		 * soon as we find a hole that the last extent we found
826 		 * is marked with FIEMAP_EXTENT_LAST
827 		 */
828 		if (!past_eof && logical + size >= isize)
829 			past_eof = true;
830 	}
831 	cond_resched();
832 	if (fatal_signal_pending(current))
833 		ret = -EINTR;
834 	else
835 		goto next;
836 out:
837 	if (ret == 1)
838 		ret = 0;
839 
840 	mutex_unlock(&inode->i_mutex);
841 	return ret;
842 }
843 
844 /*
845  * This function was originally taken from fs/mpage.c, and customized for f2fs.
846  * Major change was from block_size == page_size in f2fs by default.
847  */
848 static int f2fs_mpage_readpages(struct address_space *mapping,
849 			struct list_head *pages, struct page *page,
850 			unsigned nr_pages)
851 {
852 	struct bio *bio = NULL;
853 	unsigned page_idx;
854 	sector_t last_block_in_bio = 0;
855 	struct inode *inode = mapping->host;
856 	const unsigned blkbits = inode->i_blkbits;
857 	const unsigned blocksize = 1 << blkbits;
858 	sector_t block_in_file;
859 	sector_t last_block;
860 	sector_t last_block_in_file;
861 	sector_t block_nr;
862 	struct block_device *bdev = inode->i_sb->s_bdev;
863 	struct f2fs_map_blocks map;
864 
865 	map.m_pblk = 0;
866 	map.m_lblk = 0;
867 	map.m_len = 0;
868 	map.m_flags = 0;
869 
870 	for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
871 
872 		prefetchw(&page->flags);
873 		if (pages) {
874 			page = list_entry(pages->prev, struct page, lru);
875 			list_del(&page->lru);
876 			if (add_to_page_cache_lru(page, mapping,
877 						  page->index, GFP_KERNEL))
878 				goto next_page;
879 		}
880 
881 		block_in_file = (sector_t)page->index;
882 		last_block = block_in_file + nr_pages;
883 		last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
884 								blkbits;
885 		if (last_block > last_block_in_file)
886 			last_block = last_block_in_file;
887 
888 		/*
889 		 * Map blocks using the previous result first.
890 		 */
891 		if ((map.m_flags & F2FS_MAP_MAPPED) &&
892 				block_in_file > map.m_lblk &&
893 				block_in_file < (map.m_lblk + map.m_len))
894 			goto got_it;
895 
896 		/*
897 		 * Then do more f2fs_map_blocks() calls until we are
898 		 * done with this page.
899 		 */
900 		map.m_flags = 0;
901 
902 		if (block_in_file < last_block) {
903 			map.m_lblk = block_in_file;
904 			map.m_len = last_block - block_in_file;
905 
906 			if (f2fs_map_blocks(inode, &map, 0, false))
907 				goto set_error_page;
908 		}
909 got_it:
910 		if ((map.m_flags & F2FS_MAP_MAPPED)) {
911 			block_nr = map.m_pblk + block_in_file - map.m_lblk;
912 			SetPageMappedToDisk(page);
913 
914 			if (!PageUptodate(page) && !cleancache_get_page(page)) {
915 				SetPageUptodate(page);
916 				goto confused;
917 			}
918 		} else {
919 			zero_user_segment(page, 0, PAGE_CACHE_SIZE);
920 			SetPageUptodate(page);
921 			unlock_page(page);
922 			goto next_page;
923 		}
924 
925 		/*
926 		 * This page will go to BIO.  Do we need to send this
927 		 * BIO off first?
928 		 */
929 		if (bio && (last_block_in_bio != block_nr - 1)) {
930 submit_and_realloc:
931 			submit_bio(READ, bio);
932 			bio = NULL;
933 		}
934 		if (bio == NULL) {
935 			struct f2fs_crypto_ctx *ctx = NULL;
936 
937 			if (f2fs_encrypted_inode(inode) &&
938 					S_ISREG(inode->i_mode)) {
939 				struct page *cpage;
940 
941 				ctx = f2fs_get_crypto_ctx(inode);
942 				if (IS_ERR(ctx))
943 					goto set_error_page;
944 
945 				/* wait the page to be moved by cleaning */
946 				cpage = find_lock_page(
947 						META_MAPPING(F2FS_I_SB(inode)),
948 						block_nr);
949 				if (cpage) {
950 					f2fs_wait_on_page_writeback(cpage,
951 									DATA);
952 					f2fs_put_page(cpage, 1);
953 				}
954 			}
955 
956 			bio = bio_alloc(GFP_KERNEL,
957 				min_t(int, nr_pages, BIO_MAX_PAGES));
958 			if (!bio) {
959 				if (ctx)
960 					f2fs_release_crypto_ctx(ctx);
961 				goto set_error_page;
962 			}
963 			bio->bi_bdev = bdev;
964 			bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr);
965 			bio->bi_end_io = f2fs_read_end_io;
966 			bio->bi_private = ctx;
967 		}
968 
969 		if (bio_add_page(bio, page, blocksize, 0) < blocksize)
970 			goto submit_and_realloc;
971 
972 		last_block_in_bio = block_nr;
973 		goto next_page;
974 set_error_page:
975 		SetPageError(page);
976 		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
977 		unlock_page(page);
978 		goto next_page;
979 confused:
980 		if (bio) {
981 			submit_bio(READ, bio);
982 			bio = NULL;
983 		}
984 		unlock_page(page);
985 next_page:
986 		if (pages)
987 			page_cache_release(page);
988 	}
989 	BUG_ON(pages && !list_empty(pages));
990 	if (bio)
991 		submit_bio(READ, bio);
992 	return 0;
993 }
994 
995 static int f2fs_read_data_page(struct file *file, struct page *page)
996 {
997 	struct inode *inode = page->mapping->host;
998 	int ret = -EAGAIN;
999 
1000 	trace_f2fs_readpage(page, DATA);
1001 
1002 	/* If the file has inline data, try to read it directly */
1003 	if (f2fs_has_inline_data(inode))
1004 		ret = f2fs_read_inline_data(inode, page);
1005 	if (ret == -EAGAIN)
1006 		ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
1007 	return ret;
1008 }
1009 
1010 static int f2fs_read_data_pages(struct file *file,
1011 			struct address_space *mapping,
1012 			struct list_head *pages, unsigned nr_pages)
1013 {
1014 	struct inode *inode = file->f_mapping->host;
1015 
1016 	/* If the file has inline data, skip readpages */
1017 	if (f2fs_has_inline_data(inode))
1018 		return 0;
1019 
1020 	return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
1021 }
1022 
1023 int do_write_data_page(struct f2fs_io_info *fio)
1024 {
1025 	struct page *page = fio->page;
1026 	struct inode *inode = page->mapping->host;
1027 	struct dnode_of_data dn;
1028 	int err = 0;
1029 
1030 	set_new_dnode(&dn, inode, NULL, NULL, 0);
1031 	err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
1032 	if (err)
1033 		return err;
1034 
1035 	fio->blk_addr = dn.data_blkaddr;
1036 
1037 	/* This page is already truncated */
1038 	if (fio->blk_addr == NULL_ADDR) {
1039 		ClearPageUptodate(page);
1040 		goto out_writepage;
1041 	}
1042 
1043 	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
1044 		fio->encrypted_page = f2fs_encrypt(inode, fio->page);
1045 		if (IS_ERR(fio->encrypted_page)) {
1046 			err = PTR_ERR(fio->encrypted_page);
1047 			goto out_writepage;
1048 		}
1049 	}
1050 
1051 	set_page_writeback(page);
1052 
1053 	/*
1054 	 * If current allocation needs SSR,
1055 	 * it had better in-place writes for updated data.
1056 	 */
1057 	if (unlikely(fio->blk_addr != NEW_ADDR &&
1058 			!is_cold_data(page) &&
1059 			need_inplace_update(inode))) {
1060 		rewrite_data_page(fio);
1061 		set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
1062 		trace_f2fs_do_write_data_page(page, IPU);
1063 	} else {
1064 		write_data_page(&dn, fio);
1065 		set_data_blkaddr(&dn);
1066 		f2fs_update_extent_cache(&dn);
1067 		trace_f2fs_do_write_data_page(page, OPU);
1068 		set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
1069 		if (page->index == 0)
1070 			set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
1071 	}
1072 out_writepage:
1073 	f2fs_put_dnode(&dn);
1074 	return err;
1075 }
1076 
1077 static int f2fs_write_data_page(struct page *page,
1078 					struct writeback_control *wbc)
1079 {
1080 	struct inode *inode = page->mapping->host;
1081 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1082 	loff_t i_size = i_size_read(inode);
1083 	const pgoff_t end_index = ((unsigned long long) i_size)
1084 							>> PAGE_CACHE_SHIFT;
1085 	unsigned offset = 0;
1086 	bool need_balance_fs = false;
1087 	int err = 0;
1088 	struct f2fs_io_info fio = {
1089 		.sbi = sbi,
1090 		.type = DATA,
1091 		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
1092 		.page = page,
1093 		.encrypted_page = NULL,
1094 	};
1095 
1096 	trace_f2fs_writepage(page, DATA);
1097 
1098 	if (page->index < end_index)
1099 		goto write;
1100 
1101 	/*
1102 	 * If the offset is out-of-range of file size,
1103 	 * this page does not have to be written to disk.
1104 	 */
1105 	offset = i_size & (PAGE_CACHE_SIZE - 1);
1106 	if ((page->index >= end_index + 1) || !offset)
1107 		goto out;
1108 
1109 	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
1110 write:
1111 	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1112 		goto redirty_out;
1113 	if (f2fs_is_drop_cache(inode))
1114 		goto out;
1115 	if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim &&
1116 			available_free_memory(sbi, BASE_CHECK))
1117 		goto redirty_out;
1118 
1119 	/* Dentry blocks are controlled by checkpoint */
1120 	if (S_ISDIR(inode->i_mode)) {
1121 		if (unlikely(f2fs_cp_error(sbi)))
1122 			goto redirty_out;
1123 		err = do_write_data_page(&fio);
1124 		goto done;
1125 	}
1126 
1127 	/* we should bypass data pages to proceed the kworkder jobs */
1128 	if (unlikely(f2fs_cp_error(sbi))) {
1129 		SetPageError(page);
1130 		goto out;
1131 	}
1132 
1133 	if (!wbc->for_reclaim)
1134 		need_balance_fs = true;
1135 	else if (has_not_enough_free_secs(sbi, 0))
1136 		goto redirty_out;
1137 
1138 	err = -EAGAIN;
1139 	f2fs_lock_op(sbi);
1140 	if (f2fs_has_inline_data(inode))
1141 		err = f2fs_write_inline_data(inode, page);
1142 	if (err == -EAGAIN)
1143 		err = do_write_data_page(&fio);
1144 	f2fs_unlock_op(sbi);
1145 done:
1146 	if (err && err != -ENOENT)
1147 		goto redirty_out;
1148 
1149 	clear_cold_data(page);
1150 out:
1151 	inode_dec_dirty_pages(inode);
1152 	if (err)
1153 		ClearPageUptodate(page);
1154 	unlock_page(page);
1155 	if (need_balance_fs)
1156 		f2fs_balance_fs(sbi);
1157 	if (wbc->for_reclaim)
1158 		f2fs_submit_merged_bio(sbi, DATA, WRITE);
1159 	return 0;
1160 
1161 redirty_out:
1162 	redirty_page_for_writepage(wbc, page);
1163 	return AOP_WRITEPAGE_ACTIVATE;
1164 }
1165 
1166 static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
1167 			void *data)
1168 {
1169 	struct address_space *mapping = data;
1170 	int ret = mapping->a_ops->writepage(page, wbc);
1171 	mapping_set_error(mapping, ret);
1172 	return ret;
1173 }
1174 
1175 /*
1176  * This function was copied from write_cche_pages from mm/page-writeback.c.
1177  * The major change is making write step of cold data page separately from
1178  * warm/hot data page.
1179  */
1180 static int f2fs_write_cache_pages(struct address_space *mapping,
1181 			struct writeback_control *wbc, writepage_t writepage,
1182 			void *data)
1183 {
1184 	int ret = 0;
1185 	int done = 0;
1186 	struct pagevec pvec;
1187 	int nr_pages;
1188 	pgoff_t uninitialized_var(writeback_index);
1189 	pgoff_t index;
1190 	pgoff_t end;		/* Inclusive */
1191 	pgoff_t done_index;
1192 	int cycled;
1193 	int range_whole = 0;
1194 	int tag;
1195 	int step = 0;
1196 
1197 	pagevec_init(&pvec, 0);
1198 next:
1199 	if (wbc->range_cyclic) {
1200 		writeback_index = mapping->writeback_index; /* prev offset */
1201 		index = writeback_index;
1202 		if (index == 0)
1203 			cycled = 1;
1204 		else
1205 			cycled = 0;
1206 		end = -1;
1207 	} else {
1208 		index = wbc->range_start >> PAGE_CACHE_SHIFT;
1209 		end = wbc->range_end >> PAGE_CACHE_SHIFT;
1210 		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1211 			range_whole = 1;
1212 		cycled = 1; /* ignore range_cyclic tests */
1213 	}
1214 	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
1215 		tag = PAGECACHE_TAG_TOWRITE;
1216 	else
1217 		tag = PAGECACHE_TAG_DIRTY;
1218 retry:
1219 	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
1220 		tag_pages_for_writeback(mapping, index, end);
1221 	done_index = index;
1222 	while (!done && (index <= end)) {
1223 		int i;
1224 
1225 		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
1226 			      min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1);
1227 		if (nr_pages == 0)
1228 			break;
1229 
1230 		for (i = 0; i < nr_pages; i++) {
1231 			struct page *page = pvec.pages[i];
1232 
1233 			if (page->index > end) {
1234 				done = 1;
1235 				break;
1236 			}
1237 
1238 			done_index = page->index;
1239 
1240 			lock_page(page);
1241 
1242 			if (unlikely(page->mapping != mapping)) {
1243 continue_unlock:
1244 				unlock_page(page);
1245 				continue;
1246 			}
1247 
1248 			if (!PageDirty(page)) {
1249 				/* someone wrote it for us */
1250 				goto continue_unlock;
1251 			}
1252 
1253 			if (step == is_cold_data(page))
1254 				goto continue_unlock;
1255 
1256 			if (PageWriteback(page)) {
1257 				if (wbc->sync_mode != WB_SYNC_NONE)
1258 					f2fs_wait_on_page_writeback(page, DATA);
1259 				else
1260 					goto continue_unlock;
1261 			}
1262 
1263 			BUG_ON(PageWriteback(page));
1264 			if (!clear_page_dirty_for_io(page))
1265 				goto continue_unlock;
1266 
1267 			ret = (*writepage)(page, wbc, data);
1268 			if (unlikely(ret)) {
1269 				if (ret == AOP_WRITEPAGE_ACTIVATE) {
1270 					unlock_page(page);
1271 					ret = 0;
1272 				} else {
1273 					done_index = page->index + 1;
1274 					done = 1;
1275 					break;
1276 				}
1277 			}
1278 
1279 			if (--wbc->nr_to_write <= 0 &&
1280 			    wbc->sync_mode == WB_SYNC_NONE) {
1281 				done = 1;
1282 				break;
1283 			}
1284 		}
1285 		pagevec_release(&pvec);
1286 		cond_resched();
1287 	}
1288 
1289 	if (step < 1) {
1290 		step++;
1291 		goto next;
1292 	}
1293 
1294 	if (!cycled && !done) {
1295 		cycled = 1;
1296 		index = 0;
1297 		end = writeback_index - 1;
1298 		goto retry;
1299 	}
1300 	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1301 		mapping->writeback_index = done_index;
1302 
1303 	return ret;
1304 }
1305 
1306 static int f2fs_write_data_pages(struct address_space *mapping,
1307 			    struct writeback_control *wbc)
1308 {
1309 	struct inode *inode = mapping->host;
1310 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1311 	bool locked = false;
1312 	int ret;
1313 	long diff;
1314 
1315 	trace_f2fs_writepages(mapping->host, wbc, DATA);
1316 
1317 	/* deal with chardevs and other special file */
1318 	if (!mapping->a_ops->writepage)
1319 		return 0;
1320 
1321 	/* skip writing if there is no dirty page in this inode */
1322 	if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
1323 		return 0;
1324 
1325 	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
1326 			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
1327 			available_free_memory(sbi, DIRTY_DENTS))
1328 		goto skip_write;
1329 
1330 	/* during POR, we don't need to trigger writepage at all. */
1331 	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1332 		goto skip_write;
1333 
1334 	diff = nr_pages_to_write(sbi, DATA, wbc);
1335 
1336 	if (!S_ISDIR(inode->i_mode)) {
1337 		mutex_lock(&sbi->writepages);
1338 		locked = true;
1339 	}
1340 	ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
1341 	f2fs_submit_merged_bio(sbi, DATA, WRITE);
1342 	if (locked)
1343 		mutex_unlock(&sbi->writepages);
1344 
1345 	remove_dirty_dir_inode(inode);
1346 
1347 	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
1348 	return ret;
1349 
1350 skip_write:
1351 	wbc->pages_skipped += get_dirty_pages(inode);
1352 	return 0;
1353 }
1354 
1355 static void f2fs_write_failed(struct address_space *mapping, loff_t to)
1356 {
1357 	struct inode *inode = mapping->host;
1358 
1359 	if (to > inode->i_size) {
1360 		truncate_pagecache(inode, inode->i_size);
1361 		truncate_blocks(inode, inode->i_size, true);
1362 	}
1363 }
1364 
1365 static int f2fs_write_begin(struct file *file, struct address_space *mapping,
1366 		loff_t pos, unsigned len, unsigned flags,
1367 		struct page **pagep, void **fsdata)
1368 {
1369 	struct inode *inode = mapping->host;
1370 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1371 	struct page *page = NULL;
1372 	struct page *ipage;
1373 	pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
1374 	struct dnode_of_data dn;
1375 	int err = 0;
1376 
1377 	trace_f2fs_write_begin(inode, pos, len, flags);
1378 
1379 	f2fs_balance_fs(sbi);
1380 
1381 	/*
1382 	 * We should check this at this moment to avoid deadlock on inode page
1383 	 * and #0 page. The locking rule for inline_data conversion should be:
1384 	 * lock_page(page #0) -> lock_page(inode_page)
1385 	 */
1386 	if (index != 0) {
1387 		err = f2fs_convert_inline_inode(inode);
1388 		if (err)
1389 			goto fail;
1390 	}
1391 repeat:
1392 	page = grab_cache_page_write_begin(mapping, index, flags);
1393 	if (!page) {
1394 		err = -ENOMEM;
1395 		goto fail;
1396 	}
1397 
1398 	*pagep = page;
1399 
1400 	f2fs_lock_op(sbi);
1401 
1402 	/* check inline_data */
1403 	ipage = get_node_page(sbi, inode->i_ino);
1404 	if (IS_ERR(ipage)) {
1405 		err = PTR_ERR(ipage);
1406 		goto unlock_fail;
1407 	}
1408 
1409 	set_new_dnode(&dn, inode, ipage, ipage, 0);
1410 
1411 	if (f2fs_has_inline_data(inode)) {
1412 		if (pos + len <= MAX_INLINE_DATA) {
1413 			read_inline_data(page, ipage);
1414 			set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
1415 			sync_inode_page(&dn);
1416 			goto put_next;
1417 		}
1418 		err = f2fs_convert_inline_page(&dn, page);
1419 		if (err)
1420 			goto put_fail;
1421 	}
1422 
1423 	err = f2fs_get_block(&dn, index);
1424 	if (err)
1425 		goto put_fail;
1426 put_next:
1427 	f2fs_put_dnode(&dn);
1428 	f2fs_unlock_op(sbi);
1429 
1430 	f2fs_wait_on_page_writeback(page, DATA);
1431 
1432 	if (len == PAGE_CACHE_SIZE)
1433 		goto out_update;
1434 	if (PageUptodate(page))
1435 		goto out_clear;
1436 
1437 	if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
1438 		unsigned start = pos & (PAGE_CACHE_SIZE - 1);
1439 		unsigned end = start + len;
1440 
1441 		/* Reading beyond i_size is simple: memset to zero */
1442 		zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
1443 		goto out_update;
1444 	}
1445 
1446 	if (dn.data_blkaddr == NEW_ADDR) {
1447 		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
1448 	} else {
1449 		struct f2fs_io_info fio = {
1450 			.sbi = sbi,
1451 			.type = DATA,
1452 			.rw = READ_SYNC,
1453 			.blk_addr = dn.data_blkaddr,
1454 			.page = page,
1455 			.encrypted_page = NULL,
1456 		};
1457 		err = f2fs_submit_page_bio(&fio);
1458 		if (err)
1459 			goto fail;
1460 
1461 		lock_page(page);
1462 		if (unlikely(!PageUptodate(page))) {
1463 			err = -EIO;
1464 			goto fail;
1465 		}
1466 		if (unlikely(page->mapping != mapping)) {
1467 			f2fs_put_page(page, 1);
1468 			goto repeat;
1469 		}
1470 
1471 		/* avoid symlink page */
1472 		if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
1473 			err = f2fs_decrypt_one(inode, page);
1474 			if (err)
1475 				goto fail;
1476 		}
1477 	}
1478 out_update:
1479 	SetPageUptodate(page);
1480 out_clear:
1481 	clear_cold_data(page);
1482 	return 0;
1483 
1484 put_fail:
1485 	f2fs_put_dnode(&dn);
1486 unlock_fail:
1487 	f2fs_unlock_op(sbi);
1488 fail:
1489 	f2fs_put_page(page, 1);
1490 	f2fs_write_failed(mapping, pos + len);
1491 	return err;
1492 }
1493 
1494 static int f2fs_write_end(struct file *file,
1495 			struct address_space *mapping,
1496 			loff_t pos, unsigned len, unsigned copied,
1497 			struct page *page, void *fsdata)
1498 {
1499 	struct inode *inode = page->mapping->host;
1500 
1501 	trace_f2fs_write_end(inode, pos, len, copied);
1502 
1503 	set_page_dirty(page);
1504 
1505 	if (pos + copied > i_size_read(inode)) {
1506 		i_size_write(inode, pos + copied);
1507 		mark_inode_dirty(inode);
1508 		update_inode_page(inode);
1509 	}
1510 
1511 	f2fs_put_page(page, 1);
1512 	return copied;
1513 }
1514 
1515 static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
1516 			   loff_t offset)
1517 {
1518 	unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
1519 
1520 	if (offset & blocksize_mask)
1521 		return -EINVAL;
1522 
1523 	if (iov_iter_alignment(iter) & blocksize_mask)
1524 		return -EINVAL;
1525 
1526 	return 0;
1527 }
1528 
1529 static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
1530 			      loff_t offset)
1531 {
1532 	struct file *file = iocb->ki_filp;
1533 	struct address_space *mapping = file->f_mapping;
1534 	struct inode *inode = mapping->host;
1535 	size_t count = iov_iter_count(iter);
1536 	int err;
1537 
1538 	/* we don't need to use inline_data strictly */
1539 	if (f2fs_has_inline_data(inode)) {
1540 		err = f2fs_convert_inline_inode(inode);
1541 		if (err)
1542 			return err;
1543 	}
1544 
1545 	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
1546 		return 0;
1547 
1548 	err = check_direct_IO(inode, iter, offset);
1549 	if (err)
1550 		return err;
1551 
1552 	trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
1553 
1554 	if (iov_iter_rw(iter) == WRITE)
1555 		__allocate_data_blocks(inode, offset, count);
1556 
1557 	err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
1558 	if (err < 0 && iov_iter_rw(iter) == WRITE)
1559 		f2fs_write_failed(mapping, offset + count);
1560 
1561 	trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err);
1562 
1563 	return err;
1564 }
1565 
1566 void f2fs_invalidate_page(struct page *page, unsigned int offset,
1567 							unsigned int length)
1568 {
1569 	struct inode *inode = page->mapping->host;
1570 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1571 
1572 	if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
1573 		(offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE))
1574 		return;
1575 
1576 	if (PageDirty(page)) {
1577 		if (inode->i_ino == F2FS_META_INO(sbi))
1578 			dec_page_count(sbi, F2FS_DIRTY_META);
1579 		else if (inode->i_ino == F2FS_NODE_INO(sbi))
1580 			dec_page_count(sbi, F2FS_DIRTY_NODES);
1581 		else
1582 			inode_dec_dirty_pages(inode);
1583 	}
1584 
1585 	/* This is atomic written page, keep Private */
1586 	if (IS_ATOMIC_WRITTEN_PAGE(page))
1587 		return;
1588 
1589 	ClearPagePrivate(page);
1590 }
1591 
1592 int f2fs_release_page(struct page *page, gfp_t wait)
1593 {
1594 	/* If this is dirty page, keep PagePrivate */
1595 	if (PageDirty(page))
1596 		return 0;
1597 
1598 	/* This is atomic written page, keep Private */
1599 	if (IS_ATOMIC_WRITTEN_PAGE(page))
1600 		return 0;
1601 
1602 	ClearPagePrivate(page);
1603 	return 1;
1604 }
1605 
1606 static int f2fs_set_data_page_dirty(struct page *page)
1607 {
1608 	struct address_space *mapping = page->mapping;
1609 	struct inode *inode = mapping->host;
1610 
1611 	trace_f2fs_set_page_dirty(page, DATA);
1612 
1613 	SetPageUptodate(page);
1614 
1615 	if (f2fs_is_atomic_file(inode)) {
1616 		if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
1617 			register_inmem_page(inode, page);
1618 			return 1;
1619 		}
1620 		/*
1621 		 * Previously, this page has been registered, we just
1622 		 * return here.
1623 		 */
1624 		return 0;
1625 	}
1626 
1627 	if (!PageDirty(page)) {
1628 		__set_page_dirty_nobuffers(page);
1629 		update_dirty_page(inode, page);
1630 		return 1;
1631 	}
1632 	return 0;
1633 }
1634 
1635 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
1636 {
1637 	struct inode *inode = mapping->host;
1638 
1639 	/* we don't need to use inline_data strictly */
1640 	if (f2fs_has_inline_data(inode)) {
1641 		int err = f2fs_convert_inline_inode(inode);
1642 		if (err)
1643 			return err;
1644 	}
1645 	return generic_block_bmap(mapping, block, get_data_block_bmap);
1646 }
1647 
1648 const struct address_space_operations f2fs_dblock_aops = {
1649 	.readpage	= f2fs_read_data_page,
1650 	.readpages	= f2fs_read_data_pages,
1651 	.writepage	= f2fs_write_data_page,
1652 	.writepages	= f2fs_write_data_pages,
1653 	.write_begin	= f2fs_write_begin,
1654 	.write_end	= f2fs_write_end,
1655 	.set_page_dirty	= f2fs_set_data_page_dirty,
1656 	.invalidatepage	= f2fs_invalidate_page,
1657 	.releasepage	= f2fs_release_page,
1658 	.direct_IO	= f2fs_direct_IO,
1659 	.bmap		= f2fs_bmap,
1660 };
1661