xref: /linux/fs/f2fs/data.c (revision 3932b9ca55b0be314a36d3e84faff3e823c081f5)
1 /*
2  * fs/f2fs/data.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/buffer_head.h>
14 #include <linux/mpage.h>
15 #include <linux/aio.h>
16 #include <linux/writeback.h>
17 #include <linux/backing-dev.h>
18 #include <linux/blkdev.h>
19 #include <linux/bio.h>
20 #include <linux/prefetch.h>
21 
22 #include "f2fs.h"
23 #include "node.h"
24 #include "segment.h"
25 #include <trace/events/f2fs.h>
26 
27 static void f2fs_read_end_io(struct bio *bio, int err)
28 {
29 	struct bio_vec *bvec;
30 	int i;
31 
32 	bio_for_each_segment_all(bvec, bio, i) {
33 		struct page *page = bvec->bv_page;
34 
35 		if (!err) {
36 			SetPageUptodate(page);
37 		} else {
38 			ClearPageUptodate(page);
39 			SetPageError(page);
40 		}
41 		unlock_page(page);
42 	}
43 	bio_put(bio);
44 }
45 
46 static void f2fs_write_end_io(struct bio *bio, int err)
47 {
48 	struct f2fs_sb_info *sbi = bio->bi_private;
49 	struct bio_vec *bvec;
50 	int i;
51 
52 	bio_for_each_segment_all(bvec, bio, i) {
53 		struct page *page = bvec->bv_page;
54 
55 		if (unlikely(err)) {
56 			set_page_dirty(page);
57 			set_bit(AS_EIO, &page->mapping->flags);
58 			f2fs_stop_checkpoint(sbi);
59 		}
60 		end_page_writeback(page);
61 		dec_page_count(sbi, F2FS_WRITEBACK);
62 	}
63 
64 	if (sbi->wait_io) {
65 		complete(sbi->wait_io);
66 		sbi->wait_io = NULL;
67 	}
68 
69 	if (!get_pages(sbi, F2FS_WRITEBACK) &&
70 			!list_empty(&sbi->cp_wait.task_list))
71 		wake_up(&sbi->cp_wait);
72 
73 	bio_put(bio);
74 }
75 
76 /*
77  * Low-level block read/write IO operations.
78  */
79 static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
80 				int npages, bool is_read)
81 {
82 	struct bio *bio;
83 
84 	/* No failure on bio allocation */
85 	bio = bio_alloc(GFP_NOIO, npages);
86 
87 	bio->bi_bdev = sbi->sb->s_bdev;
88 	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
89 	bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
90 	bio->bi_private = sbi;
91 
92 	return bio;
93 }
94 
95 static void __submit_merged_bio(struct f2fs_bio_info *io)
96 {
97 	struct f2fs_io_info *fio = &io->fio;
98 	int rw;
99 
100 	if (!io->bio)
101 		return;
102 
103 	rw = fio->rw;
104 
105 	if (is_read_io(rw)) {
106 		trace_f2fs_submit_read_bio(io->sbi->sb, rw,
107 						fio->type, io->bio);
108 		submit_bio(rw, io->bio);
109 	} else {
110 		trace_f2fs_submit_write_bio(io->sbi->sb, rw,
111 						fio->type, io->bio);
112 		/*
113 		 * META_FLUSH is only from the checkpoint procedure, and we
114 		 * should wait this metadata bio for FS consistency.
115 		 */
116 		if (fio->type == META_FLUSH) {
117 			DECLARE_COMPLETION_ONSTACK(wait);
118 			io->sbi->wait_io = &wait;
119 			submit_bio(rw, io->bio);
120 			wait_for_completion(&wait);
121 		} else {
122 			submit_bio(rw, io->bio);
123 		}
124 	}
125 
126 	io->bio = NULL;
127 }
128 
129 void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
130 				enum page_type type, int rw)
131 {
132 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
133 	struct f2fs_bio_info *io;
134 
135 	io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
136 
137 	down_write(&io->io_rwsem);
138 
139 	/* change META to META_FLUSH in the checkpoint procedure */
140 	if (type >= META_FLUSH) {
141 		io->fio.type = META_FLUSH;
142 		if (test_opt(sbi, NOBARRIER))
143 			io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
144 		else
145 			io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
146 	}
147 	__submit_merged_bio(io);
148 	up_write(&io->io_rwsem);
149 }
150 
151 /*
152  * Fill the locked page with data located in the block address.
153  * Return unlocked page.
154  */
155 int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
156 					block_t blk_addr, int rw)
157 {
158 	struct bio *bio;
159 
160 	trace_f2fs_submit_page_bio(page, blk_addr, rw);
161 
162 	/* Allocate a new bio */
163 	bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw));
164 
165 	if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
166 		bio_put(bio);
167 		f2fs_put_page(page, 1);
168 		return -EFAULT;
169 	}
170 
171 	submit_bio(rw, bio);
172 	return 0;
173 }
174 
175 void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
176 			block_t blk_addr, struct f2fs_io_info *fio)
177 {
178 	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
179 	struct f2fs_bio_info *io;
180 	bool is_read = is_read_io(fio->rw);
181 
182 	io = is_read ? &sbi->read_io : &sbi->write_io[btype];
183 
184 	verify_block_addr(sbi, blk_addr);
185 
186 	down_write(&io->io_rwsem);
187 
188 	if (!is_read)
189 		inc_page_count(sbi, F2FS_WRITEBACK);
190 
191 	if (io->bio && (io->last_block_in_bio != blk_addr - 1 ||
192 						io->fio.rw != fio->rw))
193 		__submit_merged_bio(io);
194 alloc_new:
195 	if (io->bio == NULL) {
196 		int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
197 
198 		io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read);
199 		io->fio = *fio;
200 	}
201 
202 	if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) <
203 							PAGE_CACHE_SIZE) {
204 		__submit_merged_bio(io);
205 		goto alloc_new;
206 	}
207 
208 	io->last_block_in_bio = blk_addr;
209 
210 	up_write(&io->io_rwsem);
211 	trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
212 }
213 
214 /*
215  * Lock ordering for the change of data block address:
216  * ->data_page
217  *  ->node_page
218  *    update block addresses in the node page
219  */
220 static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
221 {
222 	struct f2fs_node *rn;
223 	__le32 *addr_array;
224 	struct page *node_page = dn->node_page;
225 	unsigned int ofs_in_node = dn->ofs_in_node;
226 
227 	f2fs_wait_on_page_writeback(node_page, NODE);
228 
229 	rn = F2FS_NODE(node_page);
230 
231 	/* Get physical address of data block */
232 	addr_array = blkaddr_in_node(rn);
233 	addr_array[ofs_in_node] = cpu_to_le32(new_addr);
234 	set_page_dirty(node_page);
235 }
236 
237 int reserve_new_block(struct dnode_of_data *dn)
238 {
239 	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
240 
241 	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
242 		return -EPERM;
243 	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
244 		return -ENOSPC;
245 
246 	trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
247 
248 	__set_data_blkaddr(dn, NEW_ADDR);
249 	dn->data_blkaddr = NEW_ADDR;
250 	mark_inode_dirty(dn->inode);
251 	sync_inode_page(dn);
252 	return 0;
253 }
254 
255 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
256 {
257 	bool need_put = dn->inode_page ? false : true;
258 	int err;
259 
260 	/* if inode_page exists, index should be zero */
261 	f2fs_bug_on(!need_put && index);
262 
263 	err = get_dnode_of_data(dn, index, ALLOC_NODE);
264 	if (err)
265 		return err;
266 
267 	if (dn->data_blkaddr == NULL_ADDR)
268 		err = reserve_new_block(dn);
269 	if (err || need_put)
270 		f2fs_put_dnode(dn);
271 	return err;
272 }
273 
274 static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
275 					struct buffer_head *bh_result)
276 {
277 	struct f2fs_inode_info *fi = F2FS_I(inode);
278 	pgoff_t start_fofs, end_fofs;
279 	block_t start_blkaddr;
280 
281 	if (is_inode_flag_set(fi, FI_NO_EXTENT))
282 		return 0;
283 
284 	read_lock(&fi->ext.ext_lock);
285 	if (fi->ext.len == 0) {
286 		read_unlock(&fi->ext.ext_lock);
287 		return 0;
288 	}
289 
290 	stat_inc_total_hit(inode->i_sb);
291 
292 	start_fofs = fi->ext.fofs;
293 	end_fofs = fi->ext.fofs + fi->ext.len - 1;
294 	start_blkaddr = fi->ext.blk_addr;
295 
296 	if (pgofs >= start_fofs && pgofs <= end_fofs) {
297 		unsigned int blkbits = inode->i_sb->s_blocksize_bits;
298 		size_t count;
299 
300 		clear_buffer_new(bh_result);
301 		map_bh(bh_result, inode->i_sb,
302 				start_blkaddr + pgofs - start_fofs);
303 		count = end_fofs - pgofs + 1;
304 		if (count < (UINT_MAX >> blkbits))
305 			bh_result->b_size = (count << blkbits);
306 		else
307 			bh_result->b_size = UINT_MAX;
308 
309 		stat_inc_read_hit(inode->i_sb);
310 		read_unlock(&fi->ext.ext_lock);
311 		return 1;
312 	}
313 	read_unlock(&fi->ext.ext_lock);
314 	return 0;
315 }
316 
317 void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
318 {
319 	struct f2fs_inode_info *fi = F2FS_I(dn->inode);
320 	pgoff_t fofs, start_fofs, end_fofs;
321 	block_t start_blkaddr, end_blkaddr;
322 	int need_update = true;
323 
324 	f2fs_bug_on(blk_addr == NEW_ADDR);
325 	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
326 							dn->ofs_in_node;
327 
328 	/* Update the page address in the parent node */
329 	__set_data_blkaddr(dn, blk_addr);
330 
331 	if (is_inode_flag_set(fi, FI_NO_EXTENT))
332 		return;
333 
334 	write_lock(&fi->ext.ext_lock);
335 
336 	start_fofs = fi->ext.fofs;
337 	end_fofs = fi->ext.fofs + fi->ext.len - 1;
338 	start_blkaddr = fi->ext.blk_addr;
339 	end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1;
340 
341 	/* Drop and initialize the matched extent */
342 	if (fi->ext.len == 1 && fofs == start_fofs)
343 		fi->ext.len = 0;
344 
345 	/* Initial extent */
346 	if (fi->ext.len == 0) {
347 		if (blk_addr != NULL_ADDR) {
348 			fi->ext.fofs = fofs;
349 			fi->ext.blk_addr = blk_addr;
350 			fi->ext.len = 1;
351 		}
352 		goto end_update;
353 	}
354 
355 	/* Front merge */
356 	if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) {
357 		fi->ext.fofs--;
358 		fi->ext.blk_addr--;
359 		fi->ext.len++;
360 		goto end_update;
361 	}
362 
363 	/* Back merge */
364 	if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) {
365 		fi->ext.len++;
366 		goto end_update;
367 	}
368 
369 	/* Split the existing extent */
370 	if (fi->ext.len > 1 &&
371 		fofs >= start_fofs && fofs <= end_fofs) {
372 		if ((end_fofs - fofs) < (fi->ext.len >> 1)) {
373 			fi->ext.len = fofs - start_fofs;
374 		} else {
375 			fi->ext.fofs = fofs + 1;
376 			fi->ext.blk_addr = start_blkaddr +
377 					fofs - start_fofs + 1;
378 			fi->ext.len -= fofs - start_fofs + 1;
379 		}
380 	} else {
381 		need_update = false;
382 	}
383 
384 	/* Finally, if the extent is very fragmented, let's drop the cache. */
385 	if (fi->ext.len < F2FS_MIN_EXTENT_LEN) {
386 		fi->ext.len = 0;
387 		set_inode_flag(fi, FI_NO_EXTENT);
388 		need_update = true;
389 	}
390 end_update:
391 	write_unlock(&fi->ext.ext_lock);
392 	if (need_update)
393 		sync_inode_page(dn);
394 	return;
395 }
396 
397 struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
398 {
399 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
400 	struct address_space *mapping = inode->i_mapping;
401 	struct dnode_of_data dn;
402 	struct page *page;
403 	int err;
404 
405 	page = find_get_page(mapping, index);
406 	if (page && PageUptodate(page))
407 		return page;
408 	f2fs_put_page(page, 0);
409 
410 	set_new_dnode(&dn, inode, NULL, NULL, 0);
411 	err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
412 	if (err)
413 		return ERR_PTR(err);
414 	f2fs_put_dnode(&dn);
415 
416 	if (dn.data_blkaddr == NULL_ADDR)
417 		return ERR_PTR(-ENOENT);
418 
419 	/* By fallocate(), there is no cached page, but with NEW_ADDR */
420 	if (unlikely(dn.data_blkaddr == NEW_ADDR))
421 		return ERR_PTR(-EINVAL);
422 
423 	page = grab_cache_page(mapping, index);
424 	if (!page)
425 		return ERR_PTR(-ENOMEM);
426 
427 	if (PageUptodate(page)) {
428 		unlock_page(page);
429 		return page;
430 	}
431 
432 	err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
433 					sync ? READ_SYNC : READA);
434 	if (err)
435 		return ERR_PTR(err);
436 
437 	if (sync) {
438 		wait_on_page_locked(page);
439 		if (unlikely(!PageUptodate(page))) {
440 			f2fs_put_page(page, 0);
441 			return ERR_PTR(-EIO);
442 		}
443 	}
444 	return page;
445 }
446 
447 /*
448  * If it tries to access a hole, return an error.
449  * Because, the callers, functions in dir.c and GC, should be able to know
450  * whether this page exists or not.
451  */
452 struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
453 {
454 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
455 	struct address_space *mapping = inode->i_mapping;
456 	struct dnode_of_data dn;
457 	struct page *page;
458 	int err;
459 
460 repeat:
461 	page = grab_cache_page(mapping, index);
462 	if (!page)
463 		return ERR_PTR(-ENOMEM);
464 
465 	set_new_dnode(&dn, inode, NULL, NULL, 0);
466 	err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
467 	if (err) {
468 		f2fs_put_page(page, 1);
469 		return ERR_PTR(err);
470 	}
471 	f2fs_put_dnode(&dn);
472 
473 	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
474 		f2fs_put_page(page, 1);
475 		return ERR_PTR(-ENOENT);
476 	}
477 
478 	if (PageUptodate(page))
479 		return page;
480 
481 	/*
482 	 * A new dentry page is allocated but not able to be written, since its
483 	 * new inode page couldn't be allocated due to -ENOSPC.
484 	 * In such the case, its blkaddr can be remained as NEW_ADDR.
485 	 * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
486 	 */
487 	if (dn.data_blkaddr == NEW_ADDR) {
488 		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
489 		SetPageUptodate(page);
490 		return page;
491 	}
492 
493 	err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC);
494 	if (err)
495 		return ERR_PTR(err);
496 
497 	lock_page(page);
498 	if (unlikely(!PageUptodate(page))) {
499 		f2fs_put_page(page, 1);
500 		return ERR_PTR(-EIO);
501 	}
502 	if (unlikely(page->mapping != mapping)) {
503 		f2fs_put_page(page, 1);
504 		goto repeat;
505 	}
506 	return page;
507 }
508 
509 /*
510  * Caller ensures that this data page is never allocated.
511  * A new zero-filled data page is allocated in the page cache.
512  *
513  * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
514  * f2fs_unlock_op().
515  * Note that, ipage is set only by make_empty_dir.
516  */
517 struct page *get_new_data_page(struct inode *inode,
518 		struct page *ipage, pgoff_t index, bool new_i_size)
519 {
520 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
521 	struct address_space *mapping = inode->i_mapping;
522 	struct page *page;
523 	struct dnode_of_data dn;
524 	int err;
525 
526 	set_new_dnode(&dn, inode, ipage, NULL, 0);
527 	err = f2fs_reserve_block(&dn, index);
528 	if (err)
529 		return ERR_PTR(err);
530 repeat:
531 	page = grab_cache_page(mapping, index);
532 	if (!page) {
533 		err = -ENOMEM;
534 		goto put_err;
535 	}
536 
537 	if (PageUptodate(page))
538 		return page;
539 
540 	if (dn.data_blkaddr == NEW_ADDR) {
541 		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
542 		SetPageUptodate(page);
543 	} else {
544 		err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
545 								READ_SYNC);
546 		if (err)
547 			goto put_err;
548 
549 		lock_page(page);
550 		if (unlikely(!PageUptodate(page))) {
551 			f2fs_put_page(page, 1);
552 			err = -EIO;
553 			goto put_err;
554 		}
555 		if (unlikely(page->mapping != mapping)) {
556 			f2fs_put_page(page, 1);
557 			goto repeat;
558 		}
559 	}
560 
561 	if (new_i_size &&
562 		i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
563 		i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
564 		/* Only the directory inode sets new_i_size */
565 		set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
566 	}
567 	return page;
568 
569 put_err:
570 	f2fs_put_dnode(&dn);
571 	return ERR_PTR(err);
572 }
573 
574 static int __allocate_data_block(struct dnode_of_data *dn)
575 {
576 	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
577 	struct f2fs_summary sum;
578 	block_t new_blkaddr;
579 	struct node_info ni;
580 	int type;
581 
582 	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
583 		return -EPERM;
584 	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
585 		return -ENOSPC;
586 
587 	__set_data_blkaddr(dn, NEW_ADDR);
588 	dn->data_blkaddr = NEW_ADDR;
589 
590 	get_node_info(sbi, dn->nid, &ni);
591 	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
592 
593 	type = CURSEG_WARM_DATA;
594 
595 	allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);
596 
597 	/* direct IO doesn't use extent cache to maximize the performance */
598 	set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
599 	update_extent_cache(new_blkaddr, dn);
600 	clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
601 
602 	dn->data_blkaddr = new_blkaddr;
603 	return 0;
604 }
605 
606 /*
607  * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh.
608  * If original data blocks are allocated, then give them to blockdev.
609  * Otherwise,
610  *     a. preallocate requested block addresses
611  *     b. do not use extent cache for better performance
612  *     c. give the block addresses to blockdev
613  */
614 static int __get_data_block(struct inode *inode, sector_t iblock,
615 			struct buffer_head *bh_result, int create, bool fiemap)
616 {
617 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
618 	unsigned int blkbits = inode->i_sb->s_blocksize_bits;
619 	unsigned maxblocks = bh_result->b_size >> blkbits;
620 	struct dnode_of_data dn;
621 	int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
622 	pgoff_t pgofs, end_offset;
623 	int err = 0, ofs = 1;
624 	bool allocated = false;
625 
626 	/* Get the page offset from the block offset(iblock) */
627 	pgofs =	(pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
628 
629 	if (check_extent_cache(inode, pgofs, bh_result))
630 		goto out;
631 
632 	if (create) {
633 		f2fs_balance_fs(sbi);
634 		f2fs_lock_op(sbi);
635 	}
636 
637 	/* When reading holes, we need its node page */
638 	set_new_dnode(&dn, inode, NULL, NULL, 0);
639 	err = get_dnode_of_data(&dn, pgofs, mode);
640 	if (err) {
641 		if (err == -ENOENT)
642 			err = 0;
643 		goto unlock_out;
644 	}
645 	if (dn.data_blkaddr == NEW_ADDR && !fiemap)
646 		goto put_out;
647 
648 	if (dn.data_blkaddr != NULL_ADDR) {
649 		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
650 	} else if (create) {
651 		err = __allocate_data_block(&dn);
652 		if (err)
653 			goto put_out;
654 		allocated = true;
655 		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
656 	} else {
657 		goto put_out;
658 	}
659 
660 	end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
661 	bh_result->b_size = (((size_t)1) << blkbits);
662 	dn.ofs_in_node++;
663 	pgofs++;
664 
665 get_next:
666 	if (dn.ofs_in_node >= end_offset) {
667 		if (allocated)
668 			sync_inode_page(&dn);
669 		allocated = false;
670 		f2fs_put_dnode(&dn);
671 
672 		set_new_dnode(&dn, inode, NULL, NULL, 0);
673 		err = get_dnode_of_data(&dn, pgofs, mode);
674 		if (err) {
675 			if (err == -ENOENT)
676 				err = 0;
677 			goto unlock_out;
678 		}
679 		if (dn.data_blkaddr == NEW_ADDR && !fiemap)
680 			goto put_out;
681 
682 		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
683 	}
684 
685 	if (maxblocks > (bh_result->b_size >> blkbits)) {
686 		block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
687 		if (blkaddr == NULL_ADDR && create) {
688 			err = __allocate_data_block(&dn);
689 			if (err)
690 				goto sync_out;
691 			allocated = true;
692 			blkaddr = dn.data_blkaddr;
693 		}
694 		/* Give more consecutive addresses for the readahead */
695 		if (blkaddr == (bh_result->b_blocknr + ofs)) {
696 			ofs++;
697 			dn.ofs_in_node++;
698 			pgofs++;
699 			bh_result->b_size += (((size_t)1) << blkbits);
700 			goto get_next;
701 		}
702 	}
703 sync_out:
704 	if (allocated)
705 		sync_inode_page(&dn);
706 put_out:
707 	f2fs_put_dnode(&dn);
708 unlock_out:
709 	if (create)
710 		f2fs_unlock_op(sbi);
711 out:
712 	trace_f2fs_get_data_block(inode, iblock, bh_result, err);
713 	return err;
714 }
715 
716 static int get_data_block(struct inode *inode, sector_t iblock,
717 			struct buffer_head *bh_result, int create)
718 {
719 	return __get_data_block(inode, iblock, bh_result, create, false);
720 }
721 
722 static int get_data_block_fiemap(struct inode *inode, sector_t iblock,
723 			struct buffer_head *bh_result, int create)
724 {
725 	return __get_data_block(inode, iblock, bh_result, create, true);
726 }
727 
728 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
729 		u64 start, u64 len)
730 {
731 	return generic_block_fiemap(inode, fieinfo,
732 				start, len, get_data_block_fiemap);
733 }
734 
735 static int f2fs_read_data_page(struct file *file, struct page *page)
736 {
737 	struct inode *inode = page->mapping->host;
738 	int ret;
739 
740 	trace_f2fs_readpage(page, DATA);
741 
742 	/* If the file has inline data, try to read it directly */
743 	if (f2fs_has_inline_data(inode))
744 		ret = f2fs_read_inline_data(inode, page);
745 	else
746 		ret = mpage_readpage(page, get_data_block);
747 
748 	return ret;
749 }
750 
751 static int f2fs_read_data_pages(struct file *file,
752 			struct address_space *mapping,
753 			struct list_head *pages, unsigned nr_pages)
754 {
755 	struct inode *inode = file->f_mapping->host;
756 
757 	/* If the file has inline data, skip readpages */
758 	if (f2fs_has_inline_data(inode))
759 		return 0;
760 
761 	return mpage_readpages(mapping, pages, nr_pages, get_data_block);
762 }
763 
764 int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
765 {
766 	struct inode *inode = page->mapping->host;
767 	block_t old_blkaddr, new_blkaddr;
768 	struct dnode_of_data dn;
769 	int err = 0;
770 
771 	set_new_dnode(&dn, inode, NULL, NULL, 0);
772 	err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
773 	if (err)
774 		return err;
775 
776 	old_blkaddr = dn.data_blkaddr;
777 
778 	/* This page is already truncated */
779 	if (old_blkaddr == NULL_ADDR)
780 		goto out_writepage;
781 
782 	set_page_writeback(page);
783 
784 	/*
785 	 * If current allocation needs SSR,
786 	 * it had better in-place writes for updated data.
787 	 */
788 	if (unlikely(old_blkaddr != NEW_ADDR &&
789 			!is_cold_data(page) &&
790 			need_inplace_update(inode))) {
791 		rewrite_data_page(page, old_blkaddr, fio);
792 		set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
793 	} else {
794 		write_data_page(page, &dn, &new_blkaddr, fio);
795 		update_extent_cache(new_blkaddr, &dn);
796 		set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
797 	}
798 out_writepage:
799 	f2fs_put_dnode(&dn);
800 	return err;
801 }
802 
803 static int f2fs_write_data_page(struct page *page,
804 					struct writeback_control *wbc)
805 {
806 	struct inode *inode = page->mapping->host;
807 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
808 	loff_t i_size = i_size_read(inode);
809 	const pgoff_t end_index = ((unsigned long long) i_size)
810 							>> PAGE_CACHE_SHIFT;
811 	unsigned offset = 0;
812 	bool need_balance_fs = false;
813 	int err = 0;
814 	struct f2fs_io_info fio = {
815 		.type = DATA,
816 		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
817 	};
818 
819 	trace_f2fs_writepage(page, DATA);
820 
821 	if (page->index < end_index)
822 		goto write;
823 
824 	/*
825 	 * If the offset is out-of-range of file size,
826 	 * this page does not have to be written to disk.
827 	 */
828 	offset = i_size & (PAGE_CACHE_SIZE - 1);
829 	if ((page->index >= end_index + 1) || !offset)
830 		goto out;
831 
832 	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
833 write:
834 	if (unlikely(sbi->por_doing))
835 		goto redirty_out;
836 
837 	/* Dentry blocks are controlled by checkpoint */
838 	if (S_ISDIR(inode->i_mode)) {
839 		if (unlikely(f2fs_cp_error(sbi)))
840 			goto redirty_out;
841 		err = do_write_data_page(page, &fio);
842 		goto done;
843 	}
844 
845 	/* we should bypass data pages to proceed the kworkder jobs */
846 	if (unlikely(f2fs_cp_error(sbi))) {
847 		SetPageError(page);
848 		unlock_page(page);
849 		return 0;
850 	}
851 
852 	if (!wbc->for_reclaim)
853 		need_balance_fs = true;
854 	else if (has_not_enough_free_secs(sbi, 0))
855 		goto redirty_out;
856 
857 	f2fs_lock_op(sbi);
858 	if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode))
859 		err = f2fs_write_inline_data(inode, page, offset);
860 	else
861 		err = do_write_data_page(page, &fio);
862 	f2fs_unlock_op(sbi);
863 done:
864 	if (err && err != -ENOENT)
865 		goto redirty_out;
866 
867 	clear_cold_data(page);
868 out:
869 	inode_dec_dirty_dents(inode);
870 	unlock_page(page);
871 	if (need_balance_fs)
872 		f2fs_balance_fs(sbi);
873 	if (wbc->for_reclaim)
874 		f2fs_submit_merged_bio(sbi, DATA, WRITE);
875 	return 0;
876 
877 redirty_out:
878 	redirty_page_for_writepage(wbc, page);
879 	return AOP_WRITEPAGE_ACTIVATE;
880 }
881 
882 static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
883 			void *data)
884 {
885 	struct address_space *mapping = data;
886 	int ret = mapping->a_ops->writepage(page, wbc);
887 	mapping_set_error(mapping, ret);
888 	return ret;
889 }
890 
891 static int f2fs_write_data_pages(struct address_space *mapping,
892 			    struct writeback_control *wbc)
893 {
894 	struct inode *inode = mapping->host;
895 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
896 	bool locked = false;
897 	int ret;
898 	long diff;
899 
900 	trace_f2fs_writepages(mapping->host, wbc, DATA);
901 
902 	/* deal with chardevs and other special file */
903 	if (!mapping->a_ops->writepage)
904 		return 0;
905 
906 	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
907 			get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) &&
908 			available_free_memory(sbi, DIRTY_DENTS))
909 		goto skip_write;
910 
911 	diff = nr_pages_to_write(sbi, DATA, wbc);
912 
913 	if (!S_ISDIR(inode->i_mode)) {
914 		mutex_lock(&sbi->writepages);
915 		locked = true;
916 	}
917 	ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
918 	if (locked)
919 		mutex_unlock(&sbi->writepages);
920 
921 	f2fs_submit_merged_bio(sbi, DATA, WRITE);
922 
923 	remove_dirty_dir_inode(inode);
924 
925 	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
926 	return ret;
927 
928 skip_write:
929 	wbc->pages_skipped += get_dirty_dents(inode);
930 	return 0;
931 }
932 
933 static void f2fs_write_failed(struct address_space *mapping, loff_t to)
934 {
935 	struct inode *inode = mapping->host;
936 
937 	if (to > inode->i_size) {
938 		truncate_pagecache(inode, inode->i_size);
939 		truncate_blocks(inode, inode->i_size, true);
940 	}
941 }
942 
943 static int f2fs_write_begin(struct file *file, struct address_space *mapping,
944 		loff_t pos, unsigned len, unsigned flags,
945 		struct page **pagep, void **fsdata)
946 {
947 	struct inode *inode = mapping->host;
948 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
949 	struct page *page;
950 	pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
951 	struct dnode_of_data dn;
952 	int err = 0;
953 
954 	trace_f2fs_write_begin(inode, pos, len, flags);
955 
956 	f2fs_balance_fs(sbi);
957 repeat:
958 	err = f2fs_convert_inline_data(inode, pos + len, NULL);
959 	if (err)
960 		goto fail;
961 
962 	page = grab_cache_page_write_begin(mapping, index, flags);
963 	if (!page) {
964 		err = -ENOMEM;
965 		goto fail;
966 	}
967 
968 	/* to avoid latency during memory pressure */
969 	unlock_page(page);
970 
971 	*pagep = page;
972 
973 	if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA)
974 		goto inline_data;
975 
976 	f2fs_lock_op(sbi);
977 	set_new_dnode(&dn, inode, NULL, NULL, 0);
978 	err = f2fs_reserve_block(&dn, index);
979 	f2fs_unlock_op(sbi);
980 	if (err) {
981 		f2fs_put_page(page, 0);
982 		goto fail;
983 	}
984 inline_data:
985 	lock_page(page);
986 	if (unlikely(page->mapping != mapping)) {
987 		f2fs_put_page(page, 1);
988 		goto repeat;
989 	}
990 
991 	f2fs_wait_on_page_writeback(page, DATA);
992 
993 	if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
994 		return 0;
995 
996 	if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
997 		unsigned start = pos & (PAGE_CACHE_SIZE - 1);
998 		unsigned end = start + len;
999 
1000 		/* Reading beyond i_size is simple: memset to zero */
1001 		zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
1002 		goto out;
1003 	}
1004 
1005 	if (dn.data_blkaddr == NEW_ADDR) {
1006 		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
1007 	} else {
1008 		if (f2fs_has_inline_data(inode)) {
1009 			err = f2fs_read_inline_data(inode, page);
1010 			if (err) {
1011 				page_cache_release(page);
1012 				goto fail;
1013 			}
1014 		} else {
1015 			err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
1016 							READ_SYNC);
1017 			if (err)
1018 				goto fail;
1019 		}
1020 
1021 		lock_page(page);
1022 		if (unlikely(!PageUptodate(page))) {
1023 			f2fs_put_page(page, 1);
1024 			err = -EIO;
1025 			goto fail;
1026 		}
1027 		if (unlikely(page->mapping != mapping)) {
1028 			f2fs_put_page(page, 1);
1029 			goto repeat;
1030 		}
1031 	}
1032 out:
1033 	SetPageUptodate(page);
1034 	clear_cold_data(page);
1035 	return 0;
1036 fail:
1037 	f2fs_write_failed(mapping, pos + len);
1038 	return err;
1039 }
1040 
1041 static int f2fs_write_end(struct file *file,
1042 			struct address_space *mapping,
1043 			loff_t pos, unsigned len, unsigned copied,
1044 			struct page *page, void *fsdata)
1045 {
1046 	struct inode *inode = page->mapping->host;
1047 
1048 	trace_f2fs_write_end(inode, pos, len, copied);
1049 
1050 	set_page_dirty(page);
1051 
1052 	if (pos + copied > i_size_read(inode)) {
1053 		i_size_write(inode, pos + copied);
1054 		mark_inode_dirty(inode);
1055 		update_inode_page(inode);
1056 	}
1057 
1058 	f2fs_put_page(page, 1);
1059 	return copied;
1060 }
1061 
1062 static int check_direct_IO(struct inode *inode, int rw,
1063 		struct iov_iter *iter, loff_t offset)
1064 {
1065 	unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
1066 
1067 	if (rw == READ)
1068 		return 0;
1069 
1070 	if (offset & blocksize_mask)
1071 		return -EINVAL;
1072 
1073 	if (iov_iter_alignment(iter) & blocksize_mask)
1074 		return -EINVAL;
1075 
1076 	return 0;
1077 }
1078 
1079 static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1080 		struct iov_iter *iter, loff_t offset)
1081 {
1082 	struct file *file = iocb->ki_filp;
1083 	struct address_space *mapping = file->f_mapping;
1084 	struct inode *inode = mapping->host;
1085 	size_t count = iov_iter_count(iter);
1086 	int err;
1087 
1088 	/* Let buffer I/O handle the inline data case. */
1089 	if (f2fs_has_inline_data(inode))
1090 		return 0;
1091 
1092 	if (check_direct_IO(inode, rw, iter, offset))
1093 		return 0;
1094 
1095 	/* clear fsync mark to recover these blocks */
1096 	fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
1097 
1098 	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
1099 
1100 	err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
1101 	if (err < 0 && (rw & WRITE))
1102 		f2fs_write_failed(mapping, offset + count);
1103 
1104 	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
1105 
1106 	return err;
1107 }
1108 
1109 static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
1110 				      unsigned int length)
1111 {
1112 	struct inode *inode = page->mapping->host;
1113 	if (PageDirty(page))
1114 		inode_dec_dirty_dents(inode);
1115 	ClearPagePrivate(page);
1116 }
1117 
1118 static int f2fs_release_data_page(struct page *page, gfp_t wait)
1119 {
1120 	ClearPagePrivate(page);
1121 	return 1;
1122 }
1123 
1124 static int f2fs_set_data_page_dirty(struct page *page)
1125 {
1126 	struct address_space *mapping = page->mapping;
1127 	struct inode *inode = mapping->host;
1128 
1129 	trace_f2fs_set_page_dirty(page, DATA);
1130 
1131 	SetPageUptodate(page);
1132 	mark_inode_dirty(inode);
1133 
1134 	if (!PageDirty(page)) {
1135 		__set_page_dirty_nobuffers(page);
1136 		set_dirty_dir_page(inode, page);
1137 		return 1;
1138 	}
1139 	return 0;
1140 }
1141 
1142 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
1143 {
1144 	struct inode *inode = mapping->host;
1145 
1146 	if (f2fs_has_inline_data(inode))
1147 		return 0;
1148 
1149 	return generic_block_bmap(mapping, block, get_data_block);
1150 }
1151 
1152 const struct address_space_operations f2fs_dblock_aops = {
1153 	.readpage	= f2fs_read_data_page,
1154 	.readpages	= f2fs_read_data_pages,
1155 	.writepage	= f2fs_write_data_page,
1156 	.writepages	= f2fs_write_data_pages,
1157 	.write_begin	= f2fs_write_begin,
1158 	.write_end	= f2fs_write_end,
1159 	.set_page_dirty	= f2fs_set_data_page_dirty,
1160 	.invalidatepage	= f2fs_invalidate_data_page,
1161 	.releasepage	= f2fs_release_data_page,
1162 	.direct_IO	= f2fs_direct_IO,
1163 	.bmap		= f2fs_bmap,
1164 };
1165