1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * fs/f2fs/file.c
4 *
5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6 * http://www.samsung.com/
7 */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/stat.h>
11 #include <linux/writeback.h>
12 #include <linux/blkdev.h>
13 #include <linux/falloc.h>
14 #include <linux/types.h>
15 #include <linux/compat.h>
16 #include <linux/uaccess.h>
17 #include <linux/mount.h>
18 #include <linux/pagevec.h>
19 #include <linux/uio.h>
20 #include <linux/uuid.h>
21 #include <linux/file.h>
22 #include <linux/nls.h>
23 #include <linux/sched/signal.h>
24 #include <linux/fileattr.h>
25 #include <linux/fadvise.h>
26 #include <linux/iomap.h>
27
28 #include "f2fs.h"
29 #include "node.h"
30 #include "segment.h"
31 #include "xattr.h"
32 #include "acl.h"
33 #include "gc.h"
34 #include "iostat.h"
35 #include <trace/events/f2fs.h>
36 #include <uapi/linux/f2fs.h>
37
f2fs_zero_post_eof_page(struct inode * inode,loff_t new_size,bool lock)38 static void f2fs_zero_post_eof_page(struct inode *inode,
39 loff_t new_size, bool lock)
40 {
41 loff_t old_size = i_size_read(inode);
42
43 if (old_size >= new_size)
44 return;
45
46 if (mapping_empty(inode->i_mapping))
47 return;
48
49 if (lock)
50 filemap_invalidate_lock(inode->i_mapping);
51 /* zero or drop pages only in range of [old_size, new_size] */
52 truncate_inode_pages_range(inode->i_mapping, old_size, new_size);
53 if (lock)
54 filemap_invalidate_unlock(inode->i_mapping);
55 }
56
f2fs_filemap_fault(struct vm_fault * vmf)57 static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
58 {
59 struct inode *inode = file_inode(vmf->vma->vm_file);
60 vm_flags_t flags = vmf->vma->vm_flags;
61 vm_fault_t ret;
62
63 ret = filemap_fault(vmf);
64 if (ret & VM_FAULT_LOCKED)
65 f2fs_update_iostat(F2FS_I_SB(inode), inode,
66 APP_MAPPED_READ_IO, F2FS_BLKSIZE);
67
68 trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret);
69
70 return ret;
71 }
72
f2fs_vm_page_mkwrite(struct vm_fault * vmf)73 static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
74 {
75 struct folio *folio = page_folio(vmf->page);
76 struct inode *inode = file_inode(vmf->vma->vm_file);
77 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
78 struct dnode_of_data dn;
79 bool need_alloc = !f2fs_is_pinned_file(inode);
80 int err = 0;
81 vm_fault_t ret;
82
83 if (unlikely(IS_IMMUTABLE(inode)))
84 return VM_FAULT_SIGBUS;
85
86 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
87 err = -EIO;
88 goto out;
89 }
90
91 if (unlikely(f2fs_cp_error(sbi))) {
92 err = -EIO;
93 goto out;
94 }
95
96 if (!f2fs_is_checkpoint_ready(sbi)) {
97 err = -ENOSPC;
98 goto out;
99 }
100
101 err = f2fs_convert_inline_inode(inode);
102 if (err)
103 goto out;
104
105 #ifdef CONFIG_F2FS_FS_COMPRESSION
106 if (f2fs_compressed_file(inode)) {
107 int ret = f2fs_is_compressed_cluster(inode, folio->index);
108
109 if (ret < 0) {
110 err = ret;
111 goto out;
112 } else if (ret) {
113 need_alloc = false;
114 }
115 }
116 #endif
117 /* should do out of any locked page */
118 if (need_alloc)
119 f2fs_balance_fs(sbi, true);
120
121 sb_start_pagefault(inode->i_sb);
122
123 f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
124
125 f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT, true);
126
127 file_update_time(vmf->vma->vm_file);
128 filemap_invalidate_lock_shared(inode->i_mapping);
129
130 folio_lock(folio);
131 if (unlikely(folio->mapping != inode->i_mapping ||
132 folio_pos(folio) > i_size_read(inode) ||
133 !folio_test_uptodate(folio))) {
134 folio_unlock(folio);
135 err = -EFAULT;
136 goto out_sem;
137 }
138
139 set_new_dnode(&dn, inode, NULL, NULL, 0);
140 if (need_alloc) {
141 /* block allocation */
142 err = f2fs_get_block_locked(&dn, folio->index);
143 } else {
144 err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE);
145 f2fs_put_dnode(&dn);
146 if (f2fs_is_pinned_file(inode) &&
147 !__is_valid_data_blkaddr(dn.data_blkaddr))
148 err = -EIO;
149 }
150
151 if (err) {
152 folio_unlock(folio);
153 goto out_sem;
154 }
155
156 f2fs_folio_wait_writeback(folio, DATA, false, true);
157
158 /* wait for GCed page writeback via META_MAPPING */
159 f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
160
161 /*
162 * check to see if the page is mapped already (no holes)
163 */
164 if (folio_test_mappedtodisk(folio))
165 goto out_sem;
166
167 /* page is wholly or partially inside EOF */
168 if (((loff_t)(folio->index + 1) << PAGE_SHIFT) >
169 i_size_read(inode)) {
170 loff_t offset;
171
172 offset = i_size_read(inode) & ~PAGE_MASK;
173 folio_zero_segment(folio, offset, folio_size(folio));
174 }
175 folio_mark_dirty(folio);
176
177 f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE);
178 f2fs_update_time(sbi, REQ_TIME);
179
180 out_sem:
181 filemap_invalidate_unlock_shared(inode->i_mapping);
182
183 sb_end_pagefault(inode->i_sb);
184 out:
185 ret = vmf_fs_error(err);
186
187 trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret);
188 return ret;
189 }
190
191 static const struct vm_operations_struct f2fs_file_vm_ops = {
192 .fault = f2fs_filemap_fault,
193 .map_pages = filemap_map_pages,
194 .page_mkwrite = f2fs_vm_page_mkwrite,
195 };
196
get_parent_ino(struct inode * inode,nid_t * pino)197 static int get_parent_ino(struct inode *inode, nid_t *pino)
198 {
199 struct dentry *dentry;
200
201 /*
202 * Make sure to get the non-deleted alias. The alias associated with
203 * the open file descriptor being fsync()'ed may be deleted already.
204 */
205 dentry = d_find_alias(inode);
206 if (!dentry)
207 return 0;
208
209 *pino = d_parent_ino(dentry);
210 dput(dentry);
211 return 1;
212 }
213
need_do_checkpoint(struct inode * inode)214 static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
215 {
216 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
217 enum cp_reason_type cp_reason = CP_NO_NEEDED;
218
219 if (!S_ISREG(inode->i_mode))
220 cp_reason = CP_NON_REGULAR;
221 else if (f2fs_compressed_file(inode))
222 cp_reason = CP_COMPRESSED;
223 else if (inode->i_nlink != 1)
224 cp_reason = CP_HARDLINK;
225 else if (is_sbi_flag_set(sbi, SBI_NEED_CP))
226 cp_reason = CP_SB_NEED_CP;
227 else if (file_wrong_pino(inode))
228 cp_reason = CP_WRONG_PINO;
229 else if (!f2fs_space_for_roll_forward(sbi))
230 cp_reason = CP_NO_SPC_ROLL;
231 else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
232 cp_reason = CP_NODE_NEED_CP;
233 else if (test_opt(sbi, FASTBOOT))
234 cp_reason = CP_FASTBOOT_MODE;
235 else if (F2FS_OPTION(sbi).active_logs == 2)
236 cp_reason = CP_SPEC_LOG_NUM;
237 else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT &&
238 f2fs_need_dentry_mark(sbi, inode->i_ino) &&
239 f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino,
240 TRANS_DIR_INO))
241 cp_reason = CP_RECOVER_DIR;
242 else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino,
243 XATTR_DIR_INO))
244 cp_reason = CP_XATTR_DIR;
245
246 return cp_reason;
247 }
248
need_inode_page_update(struct f2fs_sb_info * sbi,nid_t ino)249 static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
250 {
251 struct folio *i = filemap_get_folio(NODE_MAPPING(sbi), ino);
252 bool ret = false;
253 /* But we need to avoid that there are some inode updates */
254 if ((!IS_ERR(i) && folio_test_dirty(i)) ||
255 f2fs_need_inode_block_update(sbi, ino))
256 ret = true;
257 f2fs_folio_put(i, false);
258 return ret;
259 }
260
try_to_fix_pino(struct inode * inode)261 static void try_to_fix_pino(struct inode *inode)
262 {
263 struct f2fs_inode_info *fi = F2FS_I(inode);
264 nid_t pino;
265
266 f2fs_down_write(&fi->i_sem);
267 if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
268 get_parent_ino(inode, &pino)) {
269 f2fs_i_pino_write(inode, pino);
270 file_got_pino(inode);
271 }
272 f2fs_up_write(&fi->i_sem);
273 }
274
f2fs_do_sync_file(struct file * file,loff_t start,loff_t end,int datasync,bool atomic)275 static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
276 int datasync, bool atomic)
277 {
278 struct inode *inode = file->f_mapping->host;
279 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
280 nid_t ino = inode->i_ino;
281 int ret = 0;
282 enum cp_reason_type cp_reason = 0;
283 struct writeback_control wbc = {
284 .sync_mode = WB_SYNC_ALL,
285 .nr_to_write = LONG_MAX,
286 };
287 unsigned int seq_id = 0;
288
289 if (unlikely(f2fs_readonly(inode->i_sb)))
290 return 0;
291
292 trace_f2fs_sync_file_enter(inode);
293
294 if (S_ISDIR(inode->i_mode))
295 goto go_write;
296
297 /* if fdatasync is triggered, let's do in-place-update */
298 if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
299 set_inode_flag(inode, FI_NEED_IPU);
300 ret = file_write_and_wait_range(file, start, end);
301 clear_inode_flag(inode, FI_NEED_IPU);
302
303 if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
304 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
305 return ret;
306 }
307
308 /* if the inode is dirty, let's recover all the time */
309 if (!f2fs_skip_inode_update(inode, datasync)) {
310 f2fs_write_inode(inode, NULL);
311 goto go_write;
312 }
313
314 /*
315 * if there is no written data, don't waste time to write recovery info.
316 */
317 if (!is_inode_flag_set(inode, FI_APPEND_WRITE) &&
318 !f2fs_exist_written_data(sbi, ino, APPEND_INO)) {
319
320 /* it may call write_inode just prior to fsync */
321 if (need_inode_page_update(sbi, ino))
322 goto go_write;
323
324 if (is_inode_flag_set(inode, FI_UPDATE_WRITE) ||
325 f2fs_exist_written_data(sbi, ino, UPDATE_INO))
326 goto flush_out;
327 goto out;
328 } else {
329 /*
330 * for OPU case, during fsync(), node can be persisted before
331 * data when lower device doesn't support write barrier, result
332 * in data corruption after SPO.
333 * So for strict fsync mode, force to use atomic write semantics
334 * to keep write order in between data/node and last node to
335 * avoid potential data corruption.
336 */
337 if (F2FS_OPTION(sbi).fsync_mode ==
338 FSYNC_MODE_STRICT && !atomic)
339 atomic = true;
340 }
341 go_write:
342 /*
343 * Both of fdatasync() and fsync() are able to be recovered from
344 * sudden-power-off.
345 */
346 f2fs_down_read(&F2FS_I(inode)->i_sem);
347 cp_reason = need_do_checkpoint(inode);
348 f2fs_up_read(&F2FS_I(inode)->i_sem);
349
350 if (cp_reason) {
351 /* all the dirty node pages should be flushed for POR */
352 ret = f2fs_sync_fs(inode->i_sb, 1);
353
354 /*
355 * We've secured consistency through sync_fs. Following pino
356 * will be used only for fsynced inodes after checkpoint.
357 */
358 try_to_fix_pino(inode);
359 clear_inode_flag(inode, FI_APPEND_WRITE);
360 clear_inode_flag(inode, FI_UPDATE_WRITE);
361 goto out;
362 }
363 sync_nodes:
364 atomic_inc(&sbi->wb_sync_req[NODE]);
365 ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id);
366 atomic_dec(&sbi->wb_sync_req[NODE]);
367 if (ret)
368 goto out;
369
370 /* if cp_error was enabled, we should avoid infinite loop */
371 if (unlikely(f2fs_cp_error(sbi))) {
372 ret = -EIO;
373 goto out;
374 }
375
376 if (f2fs_need_inode_block_update(sbi, ino)) {
377 f2fs_mark_inode_dirty_sync(inode, true);
378 f2fs_write_inode(inode, NULL);
379 goto sync_nodes;
380 }
381
382 /*
383 * If it's atomic_write, it's just fine to keep write ordering. So
384 * here we don't need to wait for node write completion, since we use
385 * node chain which serializes node blocks. If one of node writes are
386 * reordered, we can see simply broken chain, resulting in stopping
387 * roll-forward recovery. It means we'll recover all or none node blocks
388 * given fsync mark.
389 */
390 if (!atomic) {
391 ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id);
392 if (ret)
393 goto out;
394 }
395
396 /* once recovery info is written, don't need to tack this */
397 f2fs_remove_ino_entry(sbi, ino, APPEND_INO);
398 clear_inode_flag(inode, FI_APPEND_WRITE);
399 flush_out:
400 if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER)
401 ret = f2fs_issue_flush(sbi, inode->i_ino);
402 if (!ret) {
403 f2fs_remove_ino_entry(sbi, ino, UPDATE_INO);
404 clear_inode_flag(inode, FI_UPDATE_WRITE);
405 f2fs_remove_ino_entry(sbi, ino, FLUSH_INO);
406 }
407 f2fs_update_time(sbi, REQ_TIME);
408 out:
409 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
410 return ret;
411 }
412
f2fs_sync_file(struct file * file,loff_t start,loff_t end,int datasync)413 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
414 {
415 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
416 return -EIO;
417 return f2fs_do_sync_file(file, start, end, datasync, false);
418 }
419
__found_offset(struct address_space * mapping,struct dnode_of_data * dn,pgoff_t index,int whence)420 static bool __found_offset(struct address_space *mapping,
421 struct dnode_of_data *dn, pgoff_t index, int whence)
422 {
423 block_t blkaddr = f2fs_data_blkaddr(dn);
424 struct inode *inode = mapping->host;
425 bool compressed_cluster = false;
426
427 if (f2fs_compressed_file(inode)) {
428 block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_folio,
429 ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size));
430
431 compressed_cluster = first_blkaddr == COMPRESS_ADDR;
432 }
433
434 switch (whence) {
435 case SEEK_DATA:
436 if (__is_valid_data_blkaddr(blkaddr))
437 return true;
438 if (blkaddr == NEW_ADDR &&
439 xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY))
440 return true;
441 if (compressed_cluster)
442 return true;
443 break;
444 case SEEK_HOLE:
445 if (compressed_cluster)
446 return false;
447 if (blkaddr == NULL_ADDR)
448 return true;
449 break;
450 }
451 return false;
452 }
453
f2fs_seek_block(struct file * file,loff_t offset,int whence)454 static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
455 {
456 struct inode *inode = file->f_mapping->host;
457 loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
458 struct dnode_of_data dn;
459 pgoff_t pgofs, end_offset;
460 loff_t data_ofs = offset;
461 loff_t isize;
462 int err = 0;
463
464 inode_lock_shared(inode);
465
466 isize = i_size_read(inode);
467 if (offset >= isize)
468 goto fail;
469
470 /* handle inline data case */
471 if (f2fs_has_inline_data(inode)) {
472 if (whence == SEEK_HOLE) {
473 data_ofs = isize;
474 goto found;
475 } else if (whence == SEEK_DATA) {
476 data_ofs = offset;
477 goto found;
478 }
479 }
480
481 pgofs = (pgoff_t)(offset >> PAGE_SHIFT);
482
483 for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
484 set_new_dnode(&dn, inode, NULL, NULL, 0);
485 err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE);
486 if (err && err != -ENOENT) {
487 goto fail;
488 } else if (err == -ENOENT) {
489 /* direct node does not exists */
490 if (whence == SEEK_DATA) {
491 pgofs = f2fs_get_next_page_offset(&dn, pgofs);
492 continue;
493 } else {
494 goto found;
495 }
496 }
497
498 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
499
500 /* find data/hole in dnode block */
501 for (; dn.ofs_in_node < end_offset;
502 dn.ofs_in_node++, pgofs++,
503 data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
504 block_t blkaddr;
505
506 blkaddr = f2fs_data_blkaddr(&dn);
507
508 if (__is_valid_data_blkaddr(blkaddr) &&
509 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
510 blkaddr, DATA_GENERIC_ENHANCE)) {
511 f2fs_put_dnode(&dn);
512 goto fail;
513 }
514
515 if (__found_offset(file->f_mapping, &dn,
516 pgofs, whence)) {
517 f2fs_put_dnode(&dn);
518 goto found;
519 }
520 }
521 f2fs_put_dnode(&dn);
522 }
523
524 if (whence == SEEK_DATA)
525 goto fail;
526 found:
527 if (whence == SEEK_HOLE && data_ofs > isize)
528 data_ofs = isize;
529 inode_unlock_shared(inode);
530 return vfs_setpos(file, data_ofs, maxbytes);
531 fail:
532 inode_unlock_shared(inode);
533 return -ENXIO;
534 }
535
f2fs_llseek(struct file * file,loff_t offset,int whence)536 static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
537 {
538 struct inode *inode = file->f_mapping->host;
539 loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
540
541 switch (whence) {
542 case SEEK_SET:
543 case SEEK_CUR:
544 case SEEK_END:
545 return generic_file_llseek_size(file, offset, whence,
546 maxbytes, i_size_read(inode));
547 case SEEK_DATA:
548 case SEEK_HOLE:
549 if (offset < 0)
550 return -ENXIO;
551 return f2fs_seek_block(file, offset, whence);
552 }
553
554 return -EINVAL;
555 }
556
f2fs_file_mmap_prepare(struct vm_area_desc * desc)557 static int f2fs_file_mmap_prepare(struct vm_area_desc *desc)
558 {
559 struct file *file = desc->file;
560 struct inode *inode = file_inode(file);
561
562 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
563 return -EIO;
564
565 if (!f2fs_is_compress_backend_ready(inode))
566 return -EOPNOTSUPP;
567
568 file_accessed(file);
569 desc->vm_ops = &f2fs_file_vm_ops;
570
571 f2fs_down_read(&F2FS_I(inode)->i_sem);
572 set_inode_flag(inode, FI_MMAP_FILE);
573 f2fs_up_read(&F2FS_I(inode)->i_sem);
574
575 return 0;
576 }
577
finish_preallocate_blocks(struct inode * inode)578 static int finish_preallocate_blocks(struct inode *inode)
579 {
580 int ret = 0;
581 bool opened;
582
583 f2fs_down_read(&F2FS_I(inode)->i_sem);
584 opened = is_inode_flag_set(inode, FI_OPENED_FILE);
585 f2fs_up_read(&F2FS_I(inode)->i_sem);
586 if (opened)
587 return 0;
588
589 inode_lock(inode);
590 if (is_inode_flag_set(inode, FI_OPENED_FILE))
591 goto out_unlock;
592
593 if (!file_should_truncate(inode))
594 goto out_update;
595
596 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
597 filemap_invalidate_lock(inode->i_mapping);
598
599 truncate_setsize(inode, i_size_read(inode));
600 ret = f2fs_truncate(inode);
601
602 filemap_invalidate_unlock(inode->i_mapping);
603 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
604 if (ret)
605 goto out_unlock;
606
607 file_dont_truncate(inode);
608 out_update:
609 f2fs_down_write(&F2FS_I(inode)->i_sem);
610 set_inode_flag(inode, FI_OPENED_FILE);
611 f2fs_up_write(&F2FS_I(inode)->i_sem);
612 out_unlock:
613 inode_unlock(inode);
614 return ret;
615 }
616
f2fs_file_open(struct inode * inode,struct file * filp)617 static int f2fs_file_open(struct inode *inode, struct file *filp)
618 {
619 int err = fscrypt_file_open(inode, filp);
620
621 if (err)
622 return err;
623
624 if (!f2fs_is_compress_backend_ready(inode))
625 return -EOPNOTSUPP;
626
627 err = fsverity_file_open(inode, filp);
628 if (err)
629 return err;
630
631 filp->f_mode |= FMODE_NOWAIT;
632 filp->f_mode |= FMODE_CAN_ODIRECT;
633
634 err = dquot_file_open(inode, filp);
635 if (err)
636 return err;
637
638 err = finish_preallocate_blocks(inode);
639 if (!err)
640 atomic_inc(&F2FS_I(inode)->open_count);
641 return err;
642 }
643
f2fs_truncate_data_blocks_range(struct dnode_of_data * dn,int count)644 void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
645 {
646 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
647 int nr_free = 0, ofs = dn->ofs_in_node, len = count;
648 __le32 *addr;
649 bool compressed_cluster = false;
650 int cluster_index = 0, valid_blocks = 0;
651 int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
652 bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks);
653 block_t blkstart;
654 int blklen = 0;
655
656 addr = get_dnode_addr(dn->inode, dn->node_folio) + ofs;
657 blkstart = le32_to_cpu(*addr);
658
659 /* Assumption: truncation starts with cluster */
660 for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) {
661 block_t blkaddr = le32_to_cpu(*addr);
662
663 if (f2fs_compressed_file(dn->inode) &&
664 !(cluster_index & (cluster_size - 1))) {
665 if (compressed_cluster)
666 f2fs_i_compr_blocks_update(dn->inode,
667 valid_blocks, false);
668 compressed_cluster = (blkaddr == COMPRESS_ADDR);
669 valid_blocks = 0;
670 }
671
672 if (blkaddr == NULL_ADDR)
673 goto next;
674
675 f2fs_set_data_blkaddr(dn, NULL_ADDR);
676
677 if (__is_valid_data_blkaddr(blkaddr)) {
678 if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE))
679 goto next;
680 if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr,
681 DATA_GENERIC_ENHANCE))
682 goto next;
683 if (compressed_cluster)
684 valid_blocks++;
685 }
686
687 if (blkstart + blklen == blkaddr) {
688 blklen++;
689 } else {
690 f2fs_invalidate_blocks(sbi, blkstart, blklen);
691 blkstart = blkaddr;
692 blklen = 1;
693 }
694
695 if (!released || blkaddr != COMPRESS_ADDR)
696 nr_free++;
697
698 continue;
699
700 next:
701 if (blklen)
702 f2fs_invalidate_blocks(sbi, blkstart, blklen);
703
704 blkstart = le32_to_cpu(*(addr + 1));
705 blklen = 0;
706 }
707
708 if (blklen)
709 f2fs_invalidate_blocks(sbi, blkstart, blklen);
710
711 if (compressed_cluster)
712 f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false);
713
714 if (nr_free) {
715 pgoff_t fofs;
716 /*
717 * once we invalidate valid blkaddr in range [ofs, ofs + count],
718 * we will invalidate all blkaddr in the whole range.
719 */
720 fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio),
721 dn->inode) + ofs;
722 f2fs_update_read_extent_cache_range(dn, fofs, 0, len);
723 f2fs_update_age_extent_cache_range(dn, fofs, len);
724 dec_valid_block_count(sbi, dn->inode, nr_free);
725 }
726 dn->ofs_in_node = ofs;
727
728 f2fs_update_time(sbi, REQ_TIME);
729 trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
730 dn->ofs_in_node, nr_free);
731 }
732
truncate_partial_data_page(struct inode * inode,u64 from,bool cache_only)733 static int truncate_partial_data_page(struct inode *inode, u64 from,
734 bool cache_only)
735 {
736 loff_t offset = from & (PAGE_SIZE - 1);
737 pgoff_t index = from >> PAGE_SHIFT;
738 struct address_space *mapping = inode->i_mapping;
739 struct folio *folio;
740
741 if (!offset && !cache_only)
742 return 0;
743
744 if (cache_only) {
745 folio = filemap_lock_folio(mapping, index);
746 if (IS_ERR(folio))
747 return 0;
748 if (folio_test_uptodate(folio))
749 goto truncate_out;
750 f2fs_folio_put(folio, true);
751 return 0;
752 }
753
754 folio = f2fs_get_lock_data_folio(inode, index, true);
755 if (IS_ERR(folio))
756 return PTR_ERR(folio) == -ENOENT ? 0 : PTR_ERR(folio);
757 truncate_out:
758 f2fs_folio_wait_writeback(folio, DATA, true, true);
759 folio_zero_segment(folio, offset, folio_size(folio));
760
761 /* An encrypted inode should have a key and truncate the last page. */
762 f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode));
763 if (!cache_only)
764 folio_mark_dirty(folio);
765 f2fs_folio_put(folio, true);
766 return 0;
767 }
768
f2fs_do_truncate_blocks(struct inode * inode,u64 from,bool lock)769 int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock)
770 {
771 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
772 struct dnode_of_data dn;
773 pgoff_t free_from;
774 int count = 0, err = 0;
775 struct folio *ifolio;
776 bool truncate_page = false;
777
778 trace_f2fs_truncate_blocks_enter(inode, from);
779
780 if (IS_DEVICE_ALIASING(inode) && from) {
781 err = -EINVAL;
782 goto out_err;
783 }
784
785 free_from = (pgoff_t)F2FS_BLK_ALIGN(from);
786
787 if (free_from >= max_file_blocks(inode))
788 goto free_partial;
789
790 if (lock)
791 f2fs_lock_op(sbi);
792
793 ifolio = f2fs_get_inode_folio(sbi, inode->i_ino);
794 if (IS_ERR(ifolio)) {
795 err = PTR_ERR(ifolio);
796 goto out;
797 }
798
799 if (IS_DEVICE_ALIASING(inode)) {
800 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
801 struct extent_info ei = et->largest;
802
803 f2fs_invalidate_blocks(sbi, ei.blk, ei.len);
804
805 dec_valid_block_count(sbi, inode, ei.len);
806 f2fs_update_time(sbi, REQ_TIME);
807
808 f2fs_folio_put(ifolio, true);
809 goto out;
810 }
811
812 if (f2fs_has_inline_data(inode)) {
813 f2fs_truncate_inline_inode(inode, ifolio, from);
814 f2fs_folio_put(ifolio, true);
815 truncate_page = true;
816 goto out;
817 }
818
819 set_new_dnode(&dn, inode, ifolio, NULL, 0);
820 err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA);
821 if (err) {
822 if (err == -ENOENT)
823 goto free_next;
824 goto out;
825 }
826
827 count = ADDRS_PER_PAGE(dn.node_folio, inode);
828
829 count -= dn.ofs_in_node;
830 f2fs_bug_on(sbi, count < 0);
831
832 if (dn.ofs_in_node || IS_INODE(dn.node_folio)) {
833 f2fs_truncate_data_blocks_range(&dn, count);
834 free_from += count;
835 }
836
837 f2fs_put_dnode(&dn);
838 free_next:
839 err = f2fs_truncate_inode_blocks(inode, free_from);
840 out:
841 if (lock)
842 f2fs_unlock_op(sbi);
843 free_partial:
844 /* lastly zero out the first data page */
845 if (!err)
846 err = truncate_partial_data_page(inode, from, truncate_page);
847 out_err:
848 trace_f2fs_truncate_blocks_exit(inode, err);
849 return err;
850 }
851
f2fs_truncate_blocks(struct inode * inode,u64 from,bool lock)852 int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
853 {
854 u64 free_from = from;
855 int err;
856
857 #ifdef CONFIG_F2FS_FS_COMPRESSION
858 /*
859 * for compressed file, only support cluster size
860 * aligned truncation.
861 */
862 if (f2fs_compressed_file(inode))
863 free_from = round_up(from,
864 F2FS_I(inode)->i_cluster_size << PAGE_SHIFT);
865 #endif
866
867 err = f2fs_do_truncate_blocks(inode, free_from, lock);
868 if (err)
869 return err;
870
871 #ifdef CONFIG_F2FS_FS_COMPRESSION
872 /*
873 * For compressed file, after release compress blocks, don't allow write
874 * direct, but we should allow write direct after truncate to zero.
875 */
876 if (f2fs_compressed_file(inode) && !free_from
877 && is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
878 clear_inode_flag(inode, FI_COMPRESS_RELEASED);
879
880 if (from != free_from) {
881 err = f2fs_truncate_partial_cluster(inode, from, lock);
882 if (err)
883 return err;
884 }
885 #endif
886
887 return 0;
888 }
889
f2fs_truncate(struct inode * inode)890 int f2fs_truncate(struct inode *inode)
891 {
892 int err;
893
894 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
895 return -EIO;
896
897 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
898 S_ISLNK(inode->i_mode)))
899 return 0;
900
901 trace_f2fs_truncate(inode);
902
903 if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE))
904 return -EIO;
905
906 err = f2fs_dquot_initialize(inode);
907 if (err)
908 return err;
909
910 /* we should check inline_data size */
911 if (!f2fs_may_inline_data(inode)) {
912 err = f2fs_convert_inline_inode(inode);
913 if (err) {
914 /*
915 * Always truncate page #0 to avoid page cache
916 * leak in evict() path.
917 */
918 truncate_inode_pages_range(inode->i_mapping,
919 F2FS_BLK_TO_BYTES(0),
920 F2FS_BLK_END_BYTES(0));
921 return err;
922 }
923 }
924
925 err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
926 if (err)
927 return err;
928
929 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
930 f2fs_mark_inode_dirty_sync(inode, false);
931 return 0;
932 }
933
f2fs_force_buffered_io(struct inode * inode,int rw)934 static bool f2fs_force_buffered_io(struct inode *inode, int rw)
935 {
936 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
937
938 if (!fscrypt_dio_supported(inode))
939 return true;
940 if (fsverity_active(inode))
941 return true;
942 if (f2fs_compressed_file(inode))
943 return true;
944 /*
945 * only force direct read to use buffered IO, for direct write,
946 * it expects inline data conversion before committing IO.
947 */
948 if (f2fs_has_inline_data(inode) && rw == READ)
949 return true;
950
951 /* disallow direct IO if any of devices has unaligned blksize */
952 if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
953 return true;
954 /*
955 * for blkzoned device, fallback direct IO to buffered IO, so
956 * all IOs can be serialized by log-structured write.
957 */
958 if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) &&
959 !f2fs_is_pinned_file(inode))
960 return true;
961 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
962 return true;
963
964 return false;
965 }
966
f2fs_getattr(struct mnt_idmap * idmap,const struct path * path,struct kstat * stat,u32 request_mask,unsigned int query_flags)967 int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path,
968 struct kstat *stat, u32 request_mask, unsigned int query_flags)
969 {
970 struct inode *inode = d_inode(path->dentry);
971 struct f2fs_inode_info *fi = F2FS_I(inode);
972 struct f2fs_inode *ri = NULL;
973 unsigned int flags;
974
975 if (f2fs_has_extra_attr(inode) &&
976 f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
977 F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
978 stat->result_mask |= STATX_BTIME;
979 stat->btime.tv_sec = fi->i_crtime.tv_sec;
980 stat->btime.tv_nsec = fi->i_crtime.tv_nsec;
981 }
982
983 /*
984 * Return the DIO alignment restrictions if requested. We only return
985 * this information when requested, since on encrypted files it might
986 * take a fair bit of work to get if the file wasn't opened recently.
987 *
988 * f2fs sometimes supports DIO reads but not DIO writes. STATX_DIOALIGN
989 * cannot represent that, so in that case we report no DIO support.
990 */
991 if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
992 unsigned int bsize = i_blocksize(inode);
993
994 stat->result_mask |= STATX_DIOALIGN;
995 if (!f2fs_force_buffered_io(inode, WRITE)) {
996 stat->dio_mem_align = bsize;
997 stat->dio_offset_align = bsize;
998 }
999 }
1000
1001 flags = fi->i_flags;
1002 if (flags & F2FS_COMPR_FL)
1003 stat->attributes |= STATX_ATTR_COMPRESSED;
1004 if (flags & F2FS_APPEND_FL)
1005 stat->attributes |= STATX_ATTR_APPEND;
1006 if (IS_ENCRYPTED(inode))
1007 stat->attributes |= STATX_ATTR_ENCRYPTED;
1008 if (flags & F2FS_IMMUTABLE_FL)
1009 stat->attributes |= STATX_ATTR_IMMUTABLE;
1010 if (flags & F2FS_NODUMP_FL)
1011 stat->attributes |= STATX_ATTR_NODUMP;
1012 if (IS_VERITY(inode))
1013 stat->attributes |= STATX_ATTR_VERITY;
1014
1015 stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
1016 STATX_ATTR_APPEND |
1017 STATX_ATTR_ENCRYPTED |
1018 STATX_ATTR_IMMUTABLE |
1019 STATX_ATTR_NODUMP |
1020 STATX_ATTR_VERITY);
1021
1022 generic_fillattr(idmap, request_mask, inode, stat);
1023
1024 /* we need to show initial sectors used for inline_data/dentries */
1025 if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
1026 f2fs_has_inline_dentry(inode))
1027 stat->blocks += (stat->size + 511) >> 9;
1028
1029 return 0;
1030 }
1031
1032 #ifdef CONFIG_F2FS_FS_POSIX_ACL
__setattr_copy(struct mnt_idmap * idmap,struct inode * inode,const struct iattr * attr)1033 static void __setattr_copy(struct mnt_idmap *idmap,
1034 struct inode *inode, const struct iattr *attr)
1035 {
1036 unsigned int ia_valid = attr->ia_valid;
1037
1038 i_uid_update(idmap, attr, inode);
1039 i_gid_update(idmap, attr, inode);
1040 if (ia_valid & ATTR_ATIME)
1041 inode_set_atime_to_ts(inode, attr->ia_atime);
1042 if (ia_valid & ATTR_MTIME)
1043 inode_set_mtime_to_ts(inode, attr->ia_mtime);
1044 if (ia_valid & ATTR_CTIME)
1045 inode_set_ctime_to_ts(inode, attr->ia_ctime);
1046 if (ia_valid & ATTR_MODE) {
1047 umode_t mode = attr->ia_mode;
1048
1049 if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode)))
1050 mode &= ~S_ISGID;
1051 set_acl_inode(inode, mode);
1052 }
1053 }
1054 #else
1055 #define __setattr_copy setattr_copy
1056 #endif
1057
f2fs_setattr(struct mnt_idmap * idmap,struct dentry * dentry,struct iattr * attr)1058 int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
1059 struct iattr *attr)
1060 {
1061 struct inode *inode = d_inode(dentry);
1062 struct f2fs_inode_info *fi = F2FS_I(inode);
1063 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1064 int err;
1065
1066 if (unlikely(f2fs_cp_error(sbi)))
1067 return -EIO;
1068
1069 err = setattr_prepare(idmap, dentry, attr);
1070 if (err)
1071 return err;
1072
1073 err = fscrypt_prepare_setattr(dentry, attr);
1074 if (err)
1075 return err;
1076
1077 err = fsverity_prepare_setattr(dentry, attr);
1078 if (err)
1079 return err;
1080
1081 if (unlikely(IS_IMMUTABLE(inode)))
1082 return -EPERM;
1083
1084 if (unlikely(IS_APPEND(inode) &&
1085 (attr->ia_valid & (ATTR_MODE | ATTR_UID |
1086 ATTR_GID | ATTR_TIMES_SET))))
1087 return -EPERM;
1088
1089 if ((attr->ia_valid & ATTR_SIZE)) {
1090 if (!f2fs_is_compress_backend_ready(inode) ||
1091 IS_DEVICE_ALIASING(inode))
1092 return -EOPNOTSUPP;
1093 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) &&
1094 !IS_ALIGNED(attr->ia_size,
1095 F2FS_BLK_TO_BYTES(fi->i_cluster_size)))
1096 return -EINVAL;
1097 /*
1098 * To prevent scattered pin block generation, we don't allow
1099 * smaller/equal size unaligned truncation for pinned file.
1100 * We only support overwrite IO to pinned file, so don't
1101 * care about larger size truncation.
1102 */
1103 if (f2fs_is_pinned_file(inode) &&
1104 attr->ia_size <= i_size_read(inode) &&
1105 !IS_ALIGNED(attr->ia_size,
1106 F2FS_BLK_TO_BYTES(CAP_BLKS_PER_SEC(sbi))))
1107 return -EINVAL;
1108 }
1109
1110 if (is_quota_modification(idmap, inode, attr)) {
1111 err = f2fs_dquot_initialize(inode);
1112 if (err)
1113 return err;
1114 }
1115 if (i_uid_needs_update(idmap, attr, inode) ||
1116 i_gid_needs_update(idmap, attr, inode)) {
1117 f2fs_lock_op(sbi);
1118 err = dquot_transfer(idmap, inode, attr);
1119 if (err) {
1120 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
1121 f2fs_unlock_op(sbi);
1122 return err;
1123 }
1124 /*
1125 * update uid/gid under lock_op(), so that dquot and inode can
1126 * be updated atomically.
1127 */
1128 i_uid_update(idmap, attr, inode);
1129 i_gid_update(idmap, attr, inode);
1130 f2fs_mark_inode_dirty_sync(inode, true);
1131 f2fs_unlock_op(sbi);
1132 }
1133
1134 if (attr->ia_valid & ATTR_SIZE) {
1135 loff_t old_size = i_size_read(inode);
1136
1137 if (attr->ia_size > MAX_INLINE_DATA(inode)) {
1138 /*
1139 * should convert inline inode before i_size_write to
1140 * keep smaller than inline_data size with inline flag.
1141 */
1142 err = f2fs_convert_inline_inode(inode);
1143 if (err)
1144 return err;
1145 }
1146
1147 /*
1148 * wait for inflight dio, blocks should be removed after
1149 * IO completion.
1150 */
1151 if (attr->ia_size < old_size)
1152 inode_dio_wait(inode);
1153
1154 f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
1155 filemap_invalidate_lock(inode->i_mapping);
1156
1157 if (attr->ia_size > old_size)
1158 f2fs_zero_post_eof_page(inode, attr->ia_size, false);
1159 truncate_setsize(inode, attr->ia_size);
1160
1161 if (attr->ia_size <= old_size)
1162 err = f2fs_truncate(inode);
1163 /*
1164 * do not trim all blocks after i_size if target size is
1165 * larger than i_size.
1166 */
1167 filemap_invalidate_unlock(inode->i_mapping);
1168 f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
1169 if (err)
1170 return err;
1171
1172 spin_lock(&fi->i_size_lock);
1173 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
1174 fi->last_disk_size = i_size_read(inode);
1175 spin_unlock(&fi->i_size_lock);
1176 }
1177
1178 __setattr_copy(idmap, inode, attr);
1179
1180 if (attr->ia_valid & ATTR_MODE) {
1181 err = posix_acl_chmod(idmap, dentry, f2fs_get_inode_mode(inode));
1182
1183 if (is_inode_flag_set(inode, FI_ACL_MODE)) {
1184 if (!err)
1185 inode->i_mode = fi->i_acl_mode;
1186 clear_inode_flag(inode, FI_ACL_MODE);
1187 }
1188 }
1189
1190 /* file size may changed here */
1191 f2fs_mark_inode_dirty_sync(inode, true);
1192
1193 /* inode change will produce dirty node pages flushed by checkpoint */
1194 f2fs_balance_fs(sbi, true);
1195
1196 return err;
1197 }
1198
1199 const struct inode_operations f2fs_file_inode_operations = {
1200 .getattr = f2fs_getattr,
1201 .setattr = f2fs_setattr,
1202 .get_inode_acl = f2fs_get_acl,
1203 .set_acl = f2fs_set_acl,
1204 .listxattr = f2fs_listxattr,
1205 .fiemap = f2fs_fiemap,
1206 .fileattr_get = f2fs_fileattr_get,
1207 .fileattr_set = f2fs_fileattr_set,
1208 };
1209
fill_zero(struct inode * inode,pgoff_t index,loff_t start,loff_t len)1210 static int fill_zero(struct inode *inode, pgoff_t index,
1211 loff_t start, loff_t len)
1212 {
1213 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1214 struct folio *folio;
1215
1216 if (!len)
1217 return 0;
1218
1219 f2fs_balance_fs(sbi, true);
1220
1221 f2fs_lock_op(sbi);
1222 folio = f2fs_get_new_data_folio(inode, NULL, index, false);
1223 f2fs_unlock_op(sbi);
1224
1225 if (IS_ERR(folio))
1226 return PTR_ERR(folio);
1227
1228 f2fs_folio_wait_writeback(folio, DATA, true, true);
1229 folio_zero_range(folio, start, len);
1230 folio_mark_dirty(folio);
1231 f2fs_folio_put(folio, true);
1232 return 0;
1233 }
1234
f2fs_truncate_hole(struct inode * inode,pgoff_t pg_start,pgoff_t pg_end)1235 int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
1236 {
1237 int err;
1238
1239 while (pg_start < pg_end) {
1240 struct dnode_of_data dn;
1241 pgoff_t end_offset, count;
1242
1243 set_new_dnode(&dn, inode, NULL, NULL, 0);
1244 err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE);
1245 if (err) {
1246 if (err == -ENOENT) {
1247 pg_start = f2fs_get_next_page_offset(&dn,
1248 pg_start);
1249 continue;
1250 }
1251 return err;
1252 }
1253
1254 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
1255 count = min(end_offset - dn.ofs_in_node, pg_end - pg_start);
1256
1257 f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset);
1258
1259 f2fs_truncate_data_blocks_range(&dn, count);
1260 f2fs_put_dnode(&dn);
1261
1262 pg_start += count;
1263 }
1264 return 0;
1265 }
1266
f2fs_punch_hole(struct inode * inode,loff_t offset,loff_t len)1267 static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
1268 {
1269 pgoff_t pg_start, pg_end;
1270 loff_t off_start, off_end;
1271 int ret;
1272
1273 ret = f2fs_convert_inline_inode(inode);
1274 if (ret)
1275 return ret;
1276
1277 f2fs_zero_post_eof_page(inode, offset + len, true);
1278
1279 pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
1280 pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
1281
1282 off_start = offset & (PAGE_SIZE - 1);
1283 off_end = (offset + len) & (PAGE_SIZE - 1);
1284
1285 if (pg_start == pg_end) {
1286 ret = fill_zero(inode, pg_start, off_start,
1287 off_end - off_start);
1288 if (ret)
1289 return ret;
1290 } else {
1291 if (off_start) {
1292 ret = fill_zero(inode, pg_start++, off_start,
1293 PAGE_SIZE - off_start);
1294 if (ret)
1295 return ret;
1296 }
1297 if (off_end) {
1298 ret = fill_zero(inode, pg_end, 0, off_end);
1299 if (ret)
1300 return ret;
1301 }
1302
1303 if (pg_start < pg_end) {
1304 loff_t blk_start, blk_end;
1305 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1306
1307 f2fs_balance_fs(sbi, true);
1308
1309 blk_start = (loff_t)pg_start << PAGE_SHIFT;
1310 blk_end = (loff_t)pg_end << PAGE_SHIFT;
1311
1312 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1313 filemap_invalidate_lock(inode->i_mapping);
1314
1315 truncate_pagecache_range(inode, blk_start, blk_end - 1);
1316
1317 f2fs_lock_op(sbi);
1318 ret = f2fs_truncate_hole(inode, pg_start, pg_end);
1319 f2fs_unlock_op(sbi);
1320
1321 filemap_invalidate_unlock(inode->i_mapping);
1322 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1323 }
1324 }
1325
1326 return ret;
1327 }
1328
__read_out_blkaddrs(struct inode * inode,block_t * blkaddr,int * do_replace,pgoff_t off,pgoff_t len)1329 static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr,
1330 int *do_replace, pgoff_t off, pgoff_t len)
1331 {
1332 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1333 struct dnode_of_data dn;
1334 int ret, done, i;
1335
1336 next_dnode:
1337 set_new_dnode(&dn, inode, NULL, NULL, 0);
1338 ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
1339 if (ret && ret != -ENOENT) {
1340 return ret;
1341 } else if (ret == -ENOENT) {
1342 if (dn.max_level == 0)
1343 return -ENOENT;
1344 done = min((pgoff_t)ADDRS_PER_BLOCK(inode) -
1345 dn.ofs_in_node, len);
1346 blkaddr += done;
1347 do_replace += done;
1348 goto next;
1349 }
1350
1351 done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, inode) -
1352 dn.ofs_in_node, len);
1353 for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
1354 *blkaddr = f2fs_data_blkaddr(&dn);
1355
1356 if (__is_valid_data_blkaddr(*blkaddr) &&
1357 !f2fs_is_valid_blkaddr(sbi, *blkaddr,
1358 DATA_GENERIC_ENHANCE)) {
1359 f2fs_put_dnode(&dn);
1360 return -EFSCORRUPTED;
1361 }
1362
1363 if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) {
1364
1365 if (f2fs_lfs_mode(sbi)) {
1366 f2fs_put_dnode(&dn);
1367 return -EOPNOTSUPP;
1368 }
1369
1370 /* do not invalidate this block address */
1371 f2fs_update_data_blkaddr(&dn, NULL_ADDR);
1372 *do_replace = 1;
1373 }
1374 }
1375 f2fs_put_dnode(&dn);
1376 next:
1377 len -= done;
1378 off += done;
1379 if (len)
1380 goto next_dnode;
1381 return 0;
1382 }
1383
__roll_back_blkaddrs(struct inode * inode,block_t * blkaddr,int * do_replace,pgoff_t off,int len)1384 static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr,
1385 int *do_replace, pgoff_t off, int len)
1386 {
1387 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1388 struct dnode_of_data dn;
1389 int ret, i;
1390
1391 for (i = 0; i < len; i++, do_replace++, blkaddr++) {
1392 if (*do_replace == 0)
1393 continue;
1394
1395 set_new_dnode(&dn, inode, NULL, NULL, 0);
1396 ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA);
1397 if (ret) {
1398 dec_valid_block_count(sbi, inode, 1);
1399 f2fs_invalidate_blocks(sbi, *blkaddr, 1);
1400 } else {
1401 f2fs_update_data_blkaddr(&dn, *blkaddr);
1402 }
1403 f2fs_put_dnode(&dn);
1404 }
1405 return 0;
1406 }
1407
__clone_blkaddrs(struct inode * src_inode,struct inode * dst_inode,block_t * blkaddr,int * do_replace,pgoff_t src,pgoff_t dst,pgoff_t len,bool full)1408 static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
1409 block_t *blkaddr, int *do_replace,
1410 pgoff_t src, pgoff_t dst, pgoff_t len, bool full)
1411 {
1412 struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode);
1413 pgoff_t i = 0;
1414 int ret;
1415
1416 while (i < len) {
1417 if (blkaddr[i] == NULL_ADDR && !full) {
1418 i++;
1419 continue;
1420 }
1421
1422 if (do_replace[i] || blkaddr[i] == NULL_ADDR) {
1423 struct dnode_of_data dn;
1424 struct node_info ni;
1425 size_t new_size;
1426 pgoff_t ilen;
1427
1428 set_new_dnode(&dn, dst_inode, NULL, NULL, 0);
1429 ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE);
1430 if (ret)
1431 return ret;
1432
1433 ret = f2fs_get_node_info(sbi, dn.nid, &ni, false);
1434 if (ret) {
1435 f2fs_put_dnode(&dn);
1436 return ret;
1437 }
1438
1439 ilen = min((pgoff_t)
1440 ADDRS_PER_PAGE(dn.node_folio, dst_inode) -
1441 dn.ofs_in_node, len - i);
1442 do {
1443 dn.data_blkaddr = f2fs_data_blkaddr(&dn);
1444 f2fs_truncate_data_blocks_range(&dn, 1);
1445
1446 if (do_replace[i]) {
1447 f2fs_i_blocks_write(src_inode,
1448 1, false, false);
1449 f2fs_i_blocks_write(dst_inode,
1450 1, true, false);
1451 f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
1452 blkaddr[i], ni.version, true, false);
1453
1454 do_replace[i] = 0;
1455 }
1456 dn.ofs_in_node++;
1457 i++;
1458 new_size = (loff_t)(dst + i) << PAGE_SHIFT;
1459 if (dst_inode->i_size < new_size)
1460 f2fs_i_size_write(dst_inode, new_size);
1461 } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR));
1462
1463 f2fs_put_dnode(&dn);
1464 } else {
1465 struct folio *fsrc, *fdst;
1466
1467 fsrc = f2fs_get_lock_data_folio(src_inode,
1468 src + i, true);
1469 if (IS_ERR(fsrc))
1470 return PTR_ERR(fsrc);
1471 fdst = f2fs_get_new_data_folio(dst_inode, NULL, dst + i,
1472 true);
1473 if (IS_ERR(fdst)) {
1474 f2fs_folio_put(fsrc, true);
1475 return PTR_ERR(fdst);
1476 }
1477
1478 f2fs_folio_wait_writeback(fdst, DATA, true, true);
1479
1480 memcpy_folio(fdst, 0, fsrc, 0, PAGE_SIZE);
1481 folio_mark_dirty(fdst);
1482 folio_set_f2fs_gcing(fdst);
1483 f2fs_folio_put(fdst, true);
1484 f2fs_folio_put(fsrc, true);
1485
1486 ret = f2fs_truncate_hole(src_inode,
1487 src + i, src + i + 1);
1488 if (ret)
1489 return ret;
1490 i++;
1491 }
1492 }
1493 return 0;
1494 }
1495
__exchange_data_block(struct inode * src_inode,struct inode * dst_inode,pgoff_t src,pgoff_t dst,pgoff_t len,bool full)1496 static int __exchange_data_block(struct inode *src_inode,
1497 struct inode *dst_inode, pgoff_t src, pgoff_t dst,
1498 pgoff_t len, bool full)
1499 {
1500 block_t *src_blkaddr;
1501 int *do_replace;
1502 pgoff_t olen;
1503 int ret;
1504
1505 while (len) {
1506 olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len);
1507
1508 src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode),
1509 array_size(olen, sizeof(block_t)),
1510 GFP_NOFS);
1511 if (!src_blkaddr)
1512 return -ENOMEM;
1513
1514 do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode),
1515 array_size(olen, sizeof(int)),
1516 GFP_NOFS);
1517 if (!do_replace) {
1518 kvfree(src_blkaddr);
1519 return -ENOMEM;
1520 }
1521
1522 ret = __read_out_blkaddrs(src_inode, src_blkaddr,
1523 do_replace, src, olen);
1524 if (ret)
1525 goto roll_back;
1526
1527 ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr,
1528 do_replace, src, dst, olen, full);
1529 if (ret)
1530 goto roll_back;
1531
1532 src += olen;
1533 dst += olen;
1534 len -= olen;
1535
1536 kvfree(src_blkaddr);
1537 kvfree(do_replace);
1538 }
1539 return 0;
1540
1541 roll_back:
1542 __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen);
1543 kvfree(src_blkaddr);
1544 kvfree(do_replace);
1545 return ret;
1546 }
1547
f2fs_do_collapse(struct inode * inode,loff_t offset,loff_t len)1548 static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
1549 {
1550 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1551 pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
1552 pgoff_t start = offset >> PAGE_SHIFT;
1553 pgoff_t end = (offset + len) >> PAGE_SHIFT;
1554 int ret;
1555
1556 f2fs_balance_fs(sbi, true);
1557
1558 /* avoid gc operation during block exchange */
1559 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1560 filemap_invalidate_lock(inode->i_mapping);
1561
1562 f2fs_zero_post_eof_page(inode, offset + len, false);
1563
1564 f2fs_lock_op(sbi);
1565 f2fs_drop_extent_tree(inode);
1566 truncate_pagecache(inode, offset);
1567 ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
1568 f2fs_unlock_op(sbi);
1569
1570 filemap_invalidate_unlock(inode->i_mapping);
1571 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1572 return ret;
1573 }
1574
f2fs_collapse_range(struct inode * inode,loff_t offset,loff_t len)1575 static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
1576 {
1577 loff_t new_size;
1578 int ret;
1579
1580 if (offset + len >= i_size_read(inode))
1581 return -EINVAL;
1582
1583 /* collapse range should be aligned to block size of f2fs. */
1584 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
1585 return -EINVAL;
1586
1587 ret = f2fs_convert_inline_inode(inode);
1588 if (ret)
1589 return ret;
1590
1591 /* write out all dirty pages from offset */
1592 ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
1593 if (ret)
1594 return ret;
1595
1596 ret = f2fs_do_collapse(inode, offset, len);
1597 if (ret)
1598 return ret;
1599
1600 /* write out all moved pages, if possible */
1601 filemap_invalidate_lock(inode->i_mapping);
1602 filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
1603 truncate_pagecache(inode, offset);
1604
1605 new_size = i_size_read(inode) - len;
1606 ret = f2fs_truncate_blocks(inode, new_size, true);
1607 filemap_invalidate_unlock(inode->i_mapping);
1608 if (!ret)
1609 f2fs_i_size_write(inode, new_size);
1610 return ret;
1611 }
1612
f2fs_do_zero_range(struct dnode_of_data * dn,pgoff_t start,pgoff_t end)1613 static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
1614 pgoff_t end)
1615 {
1616 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1617 pgoff_t index = start;
1618 unsigned int ofs_in_node = dn->ofs_in_node;
1619 blkcnt_t count = 0;
1620 int ret;
1621
1622 for (; index < end; index++, dn->ofs_in_node++) {
1623 if (f2fs_data_blkaddr(dn) == NULL_ADDR)
1624 count++;
1625 }
1626
1627 dn->ofs_in_node = ofs_in_node;
1628 ret = f2fs_reserve_new_blocks(dn, count);
1629 if (ret)
1630 return ret;
1631
1632 dn->ofs_in_node = ofs_in_node;
1633 for (index = start; index < end; index++, dn->ofs_in_node++) {
1634 dn->data_blkaddr = f2fs_data_blkaddr(dn);
1635 /*
1636 * f2fs_reserve_new_blocks will not guarantee entire block
1637 * allocation.
1638 */
1639 if (dn->data_blkaddr == NULL_ADDR) {
1640 ret = -ENOSPC;
1641 break;
1642 }
1643
1644 if (dn->data_blkaddr == NEW_ADDR)
1645 continue;
1646
1647 if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr,
1648 DATA_GENERIC_ENHANCE)) {
1649 ret = -EFSCORRUPTED;
1650 break;
1651 }
1652
1653 f2fs_invalidate_blocks(sbi, dn->data_blkaddr, 1);
1654 f2fs_set_data_blkaddr(dn, NEW_ADDR);
1655 }
1656
1657 f2fs_update_read_extent_cache_range(dn, start, 0, index - start);
1658 f2fs_update_age_extent_cache_range(dn, start, index - start);
1659
1660 return ret;
1661 }
1662
f2fs_zero_range(struct inode * inode,loff_t offset,loff_t len,int mode)1663 static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
1664 int mode)
1665 {
1666 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1667 struct address_space *mapping = inode->i_mapping;
1668 pgoff_t index, pg_start, pg_end;
1669 loff_t new_size = i_size_read(inode);
1670 loff_t off_start, off_end;
1671 int ret = 0;
1672
1673 ret = inode_newsize_ok(inode, (len + offset));
1674 if (ret)
1675 return ret;
1676
1677 ret = f2fs_convert_inline_inode(inode);
1678 if (ret)
1679 return ret;
1680
1681 ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
1682 if (ret)
1683 return ret;
1684
1685 f2fs_zero_post_eof_page(inode, offset + len, true);
1686
1687 pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
1688 pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
1689
1690 off_start = offset & (PAGE_SIZE - 1);
1691 off_end = (offset + len) & (PAGE_SIZE - 1);
1692
1693 if (pg_start == pg_end) {
1694 ret = fill_zero(inode, pg_start, off_start,
1695 off_end - off_start);
1696 if (ret)
1697 return ret;
1698
1699 new_size = max_t(loff_t, new_size, offset + len);
1700 } else {
1701 if (off_start) {
1702 ret = fill_zero(inode, pg_start++, off_start,
1703 PAGE_SIZE - off_start);
1704 if (ret)
1705 return ret;
1706
1707 new_size = max_t(loff_t, new_size,
1708 (loff_t)pg_start << PAGE_SHIFT);
1709 }
1710
1711 for (index = pg_start; index < pg_end;) {
1712 struct dnode_of_data dn;
1713 unsigned int end_offset;
1714 pgoff_t end;
1715
1716 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1717 filemap_invalidate_lock(mapping);
1718
1719 truncate_pagecache_range(inode,
1720 (loff_t)index << PAGE_SHIFT,
1721 ((loff_t)pg_end << PAGE_SHIFT) - 1);
1722
1723 f2fs_lock_op(sbi);
1724
1725 set_new_dnode(&dn, inode, NULL, NULL, 0);
1726 ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
1727 if (ret) {
1728 f2fs_unlock_op(sbi);
1729 filemap_invalidate_unlock(mapping);
1730 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1731 goto out;
1732 }
1733
1734 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
1735 end = min(pg_end, end_offset - dn.ofs_in_node + index);
1736
1737 ret = f2fs_do_zero_range(&dn, index, end);
1738 f2fs_put_dnode(&dn);
1739
1740 f2fs_unlock_op(sbi);
1741 filemap_invalidate_unlock(mapping);
1742 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1743
1744 f2fs_balance_fs(sbi, dn.node_changed);
1745
1746 if (ret)
1747 goto out;
1748
1749 index = end;
1750 new_size = max_t(loff_t, new_size,
1751 (loff_t)index << PAGE_SHIFT);
1752 }
1753
1754 if (off_end) {
1755 ret = fill_zero(inode, pg_end, 0, off_end);
1756 if (ret)
1757 goto out;
1758
1759 new_size = max_t(loff_t, new_size, offset + len);
1760 }
1761 }
1762
1763 out:
1764 if (new_size > i_size_read(inode)) {
1765 if (mode & FALLOC_FL_KEEP_SIZE)
1766 file_set_keep_isize(inode);
1767 else
1768 f2fs_i_size_write(inode, new_size);
1769 }
1770 return ret;
1771 }
1772
f2fs_insert_range(struct inode * inode,loff_t offset,loff_t len)1773 static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
1774 {
1775 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1776 struct address_space *mapping = inode->i_mapping;
1777 pgoff_t nr, pg_start, pg_end, delta, idx;
1778 loff_t new_size;
1779 int ret = 0;
1780
1781 new_size = i_size_read(inode) + len;
1782 ret = inode_newsize_ok(inode, new_size);
1783 if (ret)
1784 return ret;
1785
1786 if (offset >= i_size_read(inode))
1787 return -EINVAL;
1788
1789 /* insert range should be aligned to block size of f2fs. */
1790 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
1791 return -EINVAL;
1792
1793 ret = f2fs_convert_inline_inode(inode);
1794 if (ret)
1795 return ret;
1796
1797 f2fs_balance_fs(sbi, true);
1798
1799 filemap_invalidate_lock(mapping);
1800 ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
1801 filemap_invalidate_unlock(mapping);
1802 if (ret)
1803 return ret;
1804
1805 /* write out all dirty pages from offset */
1806 ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
1807 if (ret)
1808 return ret;
1809
1810 pg_start = offset >> PAGE_SHIFT;
1811 pg_end = (offset + len) >> PAGE_SHIFT;
1812 delta = pg_end - pg_start;
1813 idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
1814
1815 /* avoid gc operation during block exchange */
1816 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1817 filemap_invalidate_lock(mapping);
1818
1819 f2fs_zero_post_eof_page(inode, offset + len, false);
1820 truncate_pagecache(inode, offset);
1821
1822 while (!ret && idx > pg_start) {
1823 nr = idx - pg_start;
1824 if (nr > delta)
1825 nr = delta;
1826 idx -= nr;
1827
1828 f2fs_lock_op(sbi);
1829 f2fs_drop_extent_tree(inode);
1830
1831 ret = __exchange_data_block(inode, inode, idx,
1832 idx + delta, nr, false);
1833 f2fs_unlock_op(sbi);
1834 }
1835 filemap_invalidate_unlock(mapping);
1836 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1837 if (ret)
1838 return ret;
1839
1840 /* write out all moved pages, if possible */
1841 filemap_invalidate_lock(mapping);
1842 ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
1843 truncate_pagecache(inode, offset);
1844 filemap_invalidate_unlock(mapping);
1845
1846 if (!ret)
1847 f2fs_i_size_write(inode, new_size);
1848 return ret;
1849 }
1850
f2fs_expand_inode_data(struct inode * inode,loff_t offset,loff_t len,int mode)1851 static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
1852 loff_t len, int mode)
1853 {
1854 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1855 struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
1856 .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
1857 .m_may_create = true };
1858 struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
1859 .init_gc_type = FG_GC,
1860 .should_migrate_blocks = false,
1861 .err_gc_skipped = true,
1862 .nr_free_secs = 0 };
1863 pgoff_t pg_start, pg_end;
1864 loff_t new_size;
1865 loff_t off_end;
1866 block_t expanded = 0;
1867 int err;
1868
1869 err = inode_newsize_ok(inode, (len + offset));
1870 if (err)
1871 return err;
1872
1873 err = f2fs_convert_inline_inode(inode);
1874 if (err)
1875 return err;
1876
1877 f2fs_zero_post_eof_page(inode, offset + len, true);
1878
1879 f2fs_balance_fs(sbi, true);
1880
1881 pg_start = ((unsigned long long)offset) >> PAGE_SHIFT;
1882 pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT;
1883 off_end = (offset + len) & (PAGE_SIZE - 1);
1884
1885 map.m_lblk = pg_start;
1886 map.m_len = pg_end - pg_start;
1887 if (off_end)
1888 map.m_len++;
1889
1890 if (!map.m_len)
1891 return 0;
1892
1893 if (f2fs_is_pinned_file(inode)) {
1894 block_t sec_blks = CAP_BLKS_PER_SEC(sbi);
1895 block_t sec_len = roundup(map.m_len, sec_blks);
1896
1897 map.m_len = sec_blks;
1898 next_alloc:
1899 f2fs_down_write(&sbi->pin_sem);
1900
1901 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
1902 if (has_not_enough_free_secs(sbi, 0, 0)) {
1903 f2fs_up_write(&sbi->pin_sem);
1904 err = -ENOSPC;
1905 f2fs_warn_ratelimited(sbi,
1906 "ino:%lu, start:%lu, end:%lu, need to trigger GC to "
1907 "reclaim enough free segment when checkpoint is enabled",
1908 inode->i_ino, pg_start, pg_end);
1909 goto out_err;
1910 }
1911 }
1912
1913 if (has_not_enough_free_secs(sbi, 0,
1914 sbi->reserved_pin_section)) {
1915 f2fs_down_write(&sbi->gc_lock);
1916 stat_inc_gc_call_count(sbi, FOREGROUND);
1917 err = f2fs_gc(sbi, &gc_control);
1918 if (err && err != -ENODATA) {
1919 f2fs_up_write(&sbi->pin_sem);
1920 goto out_err;
1921 }
1922 }
1923
1924 err = f2fs_allocate_pinning_section(sbi);
1925 if (err) {
1926 f2fs_up_write(&sbi->pin_sem);
1927 goto out_err;
1928 }
1929
1930 map.m_seg_type = CURSEG_COLD_DATA_PINNED;
1931 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
1932 file_dont_truncate(inode);
1933
1934 f2fs_up_write(&sbi->pin_sem);
1935
1936 expanded += map.m_len;
1937 sec_len -= map.m_len;
1938 map.m_lblk += map.m_len;
1939 if (!err && sec_len)
1940 goto next_alloc;
1941
1942 map.m_len = expanded;
1943 } else {
1944 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_AIO);
1945 expanded = map.m_len;
1946 }
1947 out_err:
1948 if (err) {
1949 pgoff_t last_off;
1950
1951 if (!expanded)
1952 return err;
1953
1954 last_off = pg_start + expanded - 1;
1955
1956 /* update new size to the failed position */
1957 new_size = (last_off == pg_end) ? offset + len :
1958 (loff_t)(last_off + 1) << PAGE_SHIFT;
1959 } else {
1960 new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end;
1961 }
1962
1963 if (new_size > i_size_read(inode)) {
1964 if (mode & FALLOC_FL_KEEP_SIZE)
1965 file_set_keep_isize(inode);
1966 else
1967 f2fs_i_size_write(inode, new_size);
1968 }
1969
1970 return err;
1971 }
1972
f2fs_fallocate(struct file * file,int mode,loff_t offset,loff_t len)1973 static long f2fs_fallocate(struct file *file, int mode,
1974 loff_t offset, loff_t len)
1975 {
1976 struct inode *inode = file_inode(file);
1977 long ret = 0;
1978
1979 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
1980 return -EIO;
1981 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
1982 return -ENOSPC;
1983 if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode))
1984 return -EOPNOTSUPP;
1985
1986 /* f2fs only support ->fallocate for regular file */
1987 if (!S_ISREG(inode->i_mode))
1988 return -EINVAL;
1989
1990 if (IS_ENCRYPTED(inode) &&
1991 (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
1992 return -EOPNOTSUPP;
1993
1994 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
1995 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
1996 FALLOC_FL_INSERT_RANGE))
1997 return -EOPNOTSUPP;
1998
1999 inode_lock(inode);
2000
2001 /*
2002 * Pinned file should not support partial truncation since the block
2003 * can be used by applications.
2004 */
2005 if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) &&
2006 (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE |
2007 FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) {
2008 ret = -EOPNOTSUPP;
2009 goto out;
2010 }
2011
2012 ret = file_modified(file);
2013 if (ret)
2014 goto out;
2015
2016 /*
2017 * wait for inflight dio, blocks should be removed after IO
2018 * completion.
2019 */
2020 inode_dio_wait(inode);
2021
2022 if (mode & FALLOC_FL_PUNCH_HOLE) {
2023 if (offset >= inode->i_size)
2024 goto out;
2025
2026 ret = f2fs_punch_hole(inode, offset, len);
2027 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
2028 ret = f2fs_collapse_range(inode, offset, len);
2029 } else if (mode & FALLOC_FL_ZERO_RANGE) {
2030 ret = f2fs_zero_range(inode, offset, len, mode);
2031 } else if (mode & FALLOC_FL_INSERT_RANGE) {
2032 ret = f2fs_insert_range(inode, offset, len);
2033 } else {
2034 ret = f2fs_expand_inode_data(inode, offset, len, mode);
2035 }
2036
2037 if (!ret) {
2038 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
2039 f2fs_mark_inode_dirty_sync(inode, false);
2040 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2041 }
2042
2043 out:
2044 inode_unlock(inode);
2045
2046 trace_f2fs_fallocate(inode, mode, offset, len, ret);
2047 return ret;
2048 }
2049
f2fs_release_file(struct inode * inode,struct file * filp)2050 static int f2fs_release_file(struct inode *inode, struct file *filp)
2051 {
2052 if (atomic_dec_and_test(&F2FS_I(inode)->open_count))
2053 f2fs_remove_donate_inode(inode);
2054
2055 /*
2056 * f2fs_release_file is called at every close calls. So we should
2057 * not drop any inmemory pages by close called by other process.
2058 */
2059 if (!(filp->f_mode & FMODE_WRITE) ||
2060 atomic_read(&inode->i_writecount) != 1)
2061 return 0;
2062
2063 inode_lock(inode);
2064 f2fs_abort_atomic_write(inode, true);
2065 inode_unlock(inode);
2066
2067 return 0;
2068 }
2069
f2fs_file_flush(struct file * file,fl_owner_t id)2070 static int f2fs_file_flush(struct file *file, fl_owner_t id)
2071 {
2072 struct inode *inode = file_inode(file);
2073
2074 /*
2075 * If the process doing a transaction is crashed, we should do
2076 * roll-back. Otherwise, other reader/write can see corrupted database
2077 * until all the writers close its file. Since this should be done
2078 * before dropping file lock, it needs to do in ->flush.
2079 */
2080 if (F2FS_I(inode)->atomic_write_task == current &&
2081 (current->flags & PF_EXITING)) {
2082 inode_lock(inode);
2083 f2fs_abort_atomic_write(inode, true);
2084 inode_unlock(inode);
2085 }
2086
2087 return 0;
2088 }
2089
f2fs_setflags_common(struct inode * inode,u32 iflags,u32 mask)2090 static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
2091 {
2092 struct f2fs_inode_info *fi = F2FS_I(inode);
2093 u32 masked_flags = fi->i_flags & mask;
2094
2095 /* mask can be shrunk by flags_valid selector */
2096 iflags &= mask;
2097
2098 /* Is it quota file? Do not allow user to mess with it */
2099 if (IS_NOQUOTA(inode))
2100 return -EPERM;
2101
2102 if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) {
2103 if (!f2fs_sb_has_casefold(F2FS_I_SB(inode)))
2104 return -EOPNOTSUPP;
2105 if (!f2fs_empty_dir(inode))
2106 return -ENOTEMPTY;
2107 }
2108
2109 if (iflags & (F2FS_COMPR_FL | F2FS_NOCOMP_FL)) {
2110 if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
2111 return -EOPNOTSUPP;
2112 if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL))
2113 return -EINVAL;
2114 }
2115
2116 if ((iflags ^ masked_flags) & F2FS_COMPR_FL) {
2117 if (masked_flags & F2FS_COMPR_FL) {
2118 if (!f2fs_disable_compressed_file(inode))
2119 return -EINVAL;
2120 } else {
2121 /* try to convert inline_data to support compression */
2122 int err = f2fs_convert_inline_inode(inode);
2123 if (err)
2124 return err;
2125
2126 f2fs_down_write(&fi->i_sem);
2127 if (!f2fs_may_compress(inode) ||
2128 (S_ISREG(inode->i_mode) &&
2129 F2FS_HAS_BLOCKS(inode))) {
2130 f2fs_up_write(&fi->i_sem);
2131 return -EINVAL;
2132 }
2133 err = set_compress_context(inode);
2134 f2fs_up_write(&fi->i_sem);
2135
2136 if (err)
2137 return err;
2138 }
2139 }
2140
2141 fi->i_flags = iflags | (fi->i_flags & ~mask);
2142 f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) &&
2143 (fi->i_flags & F2FS_NOCOMP_FL));
2144
2145 if (fi->i_flags & F2FS_PROJINHERIT_FL)
2146 set_inode_flag(inode, FI_PROJ_INHERIT);
2147 else
2148 clear_inode_flag(inode, FI_PROJ_INHERIT);
2149
2150 inode_set_ctime_current(inode);
2151 f2fs_set_inode_flags(inode);
2152 f2fs_mark_inode_dirty_sync(inode, true);
2153 return 0;
2154 }
2155
2156 /* FS_IOC_[GS]ETFLAGS and FS_IOC_FS[GS]ETXATTR support */
2157
2158 /*
2159 * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry
2160 * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to
2161 * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add
2162 * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL.
2163 *
2164 * Translating flags to fsx_flags value used by FS_IOC_FSGETXATTR and
2165 * FS_IOC_FSSETXATTR is done by the VFS.
2166 */
2167
2168 static const struct {
2169 u32 iflag;
2170 u32 fsflag;
2171 } f2fs_fsflags_map[] = {
2172 { F2FS_COMPR_FL, FS_COMPR_FL },
2173 { F2FS_SYNC_FL, FS_SYNC_FL },
2174 { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL },
2175 { F2FS_APPEND_FL, FS_APPEND_FL },
2176 { F2FS_NODUMP_FL, FS_NODUMP_FL },
2177 { F2FS_NOATIME_FL, FS_NOATIME_FL },
2178 { F2FS_NOCOMP_FL, FS_NOCOMP_FL },
2179 { F2FS_INDEX_FL, FS_INDEX_FL },
2180 { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL },
2181 { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL },
2182 { F2FS_CASEFOLD_FL, FS_CASEFOLD_FL },
2183 };
2184
2185 #define F2FS_GETTABLE_FS_FL ( \
2186 FS_COMPR_FL | \
2187 FS_SYNC_FL | \
2188 FS_IMMUTABLE_FL | \
2189 FS_APPEND_FL | \
2190 FS_NODUMP_FL | \
2191 FS_NOATIME_FL | \
2192 FS_NOCOMP_FL | \
2193 FS_INDEX_FL | \
2194 FS_DIRSYNC_FL | \
2195 FS_PROJINHERIT_FL | \
2196 FS_ENCRYPT_FL | \
2197 FS_INLINE_DATA_FL | \
2198 FS_NOCOW_FL | \
2199 FS_VERITY_FL | \
2200 FS_CASEFOLD_FL)
2201
2202 #define F2FS_SETTABLE_FS_FL ( \
2203 FS_COMPR_FL | \
2204 FS_SYNC_FL | \
2205 FS_IMMUTABLE_FL | \
2206 FS_APPEND_FL | \
2207 FS_NODUMP_FL | \
2208 FS_NOATIME_FL | \
2209 FS_NOCOMP_FL | \
2210 FS_DIRSYNC_FL | \
2211 FS_PROJINHERIT_FL | \
2212 FS_CASEFOLD_FL)
2213
2214 /* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */
f2fs_iflags_to_fsflags(u32 iflags)2215 static inline u32 f2fs_iflags_to_fsflags(u32 iflags)
2216 {
2217 u32 fsflags = 0;
2218 int i;
2219
2220 for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
2221 if (iflags & f2fs_fsflags_map[i].iflag)
2222 fsflags |= f2fs_fsflags_map[i].fsflag;
2223
2224 return fsflags;
2225 }
2226
2227 /* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */
f2fs_fsflags_to_iflags(u32 fsflags)2228 static inline u32 f2fs_fsflags_to_iflags(u32 fsflags)
2229 {
2230 u32 iflags = 0;
2231 int i;
2232
2233 for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
2234 if (fsflags & f2fs_fsflags_map[i].fsflag)
2235 iflags |= f2fs_fsflags_map[i].iflag;
2236
2237 return iflags;
2238 }
2239
f2fs_ioc_getversion(struct file * filp,unsigned long arg)2240 static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
2241 {
2242 struct inode *inode = file_inode(filp);
2243
2244 return put_user(inode->i_generation, (int __user *)arg);
2245 }
2246
f2fs_ioc_start_atomic_write(struct file * filp,bool truncate)2247 static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
2248 {
2249 struct inode *inode = file_inode(filp);
2250 struct mnt_idmap *idmap = file_mnt_idmap(filp);
2251 struct f2fs_inode_info *fi = F2FS_I(inode);
2252 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2253 loff_t isize;
2254 int ret;
2255
2256 if (!(filp->f_mode & FMODE_WRITE))
2257 return -EBADF;
2258
2259 if (!inode_owner_or_capable(idmap, inode))
2260 return -EACCES;
2261
2262 if (!S_ISREG(inode->i_mode))
2263 return -EINVAL;
2264
2265 if (filp->f_flags & O_DIRECT)
2266 return -EINVAL;
2267
2268 ret = mnt_want_write_file(filp);
2269 if (ret)
2270 return ret;
2271
2272 inode_lock(inode);
2273
2274 if (!f2fs_disable_compressed_file(inode) ||
2275 f2fs_is_pinned_file(inode)) {
2276 ret = -EINVAL;
2277 goto out;
2278 }
2279
2280 if (f2fs_is_atomic_file(inode))
2281 goto out;
2282
2283 ret = f2fs_convert_inline_inode(inode);
2284 if (ret)
2285 goto out;
2286
2287 f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
2288 f2fs_down_write(&fi->i_gc_rwsem[READ]);
2289
2290 /*
2291 * Should wait end_io to count F2FS_WB_CP_DATA correctly by
2292 * f2fs_is_atomic_file.
2293 */
2294 if (get_dirty_pages(inode))
2295 f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u",
2296 inode->i_ino, get_dirty_pages(inode));
2297 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
2298 if (ret)
2299 goto out_unlock;
2300
2301 /* Check if the inode already has a COW inode */
2302 if (fi->cow_inode == NULL) {
2303 /* Create a COW inode for atomic write */
2304 struct dentry *dentry = file_dentry(filp);
2305 struct inode *dir = d_inode(dentry->d_parent);
2306
2307 ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode);
2308 if (ret)
2309 goto out_unlock;
2310
2311 set_inode_flag(fi->cow_inode, FI_COW_FILE);
2312 clear_inode_flag(fi->cow_inode, FI_INLINE_DATA);
2313
2314 /* Set the COW inode's atomic_inode to the atomic inode */
2315 F2FS_I(fi->cow_inode)->atomic_inode = inode;
2316 } else {
2317 /* Reuse the already created COW inode */
2318 f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode));
2319
2320 invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1);
2321
2322 ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true);
2323 if (ret)
2324 goto out_unlock;
2325 }
2326
2327 f2fs_write_inode(inode, NULL);
2328
2329 stat_inc_atomic_inode(inode);
2330
2331 set_inode_flag(inode, FI_ATOMIC_FILE);
2332
2333 isize = i_size_read(inode);
2334 fi->original_i_size = isize;
2335 if (truncate) {
2336 set_inode_flag(inode, FI_ATOMIC_REPLACE);
2337 truncate_inode_pages_final(inode->i_mapping);
2338 f2fs_i_size_write(inode, 0);
2339 isize = 0;
2340 }
2341 f2fs_i_size_write(fi->cow_inode, isize);
2342
2343 out_unlock:
2344 f2fs_up_write(&fi->i_gc_rwsem[READ]);
2345 f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
2346 if (ret)
2347 goto out;
2348
2349 f2fs_update_time(sbi, REQ_TIME);
2350 fi->atomic_write_task = current;
2351 stat_update_max_atomic_write(inode);
2352 fi->atomic_write_cnt = 0;
2353 out:
2354 inode_unlock(inode);
2355 mnt_drop_write_file(filp);
2356 return ret;
2357 }
2358
f2fs_ioc_commit_atomic_write(struct file * filp)2359 static int f2fs_ioc_commit_atomic_write(struct file *filp)
2360 {
2361 struct inode *inode = file_inode(filp);
2362 struct mnt_idmap *idmap = file_mnt_idmap(filp);
2363 int ret;
2364
2365 if (!(filp->f_mode & FMODE_WRITE))
2366 return -EBADF;
2367
2368 if (!inode_owner_or_capable(idmap, inode))
2369 return -EACCES;
2370
2371 ret = mnt_want_write_file(filp);
2372 if (ret)
2373 return ret;
2374
2375 f2fs_balance_fs(F2FS_I_SB(inode), true);
2376
2377 inode_lock(inode);
2378
2379 if (f2fs_is_atomic_file(inode)) {
2380 ret = f2fs_commit_atomic_write(inode);
2381 if (!ret)
2382 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
2383
2384 f2fs_abort_atomic_write(inode, ret);
2385 } else {
2386 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
2387 }
2388
2389 inode_unlock(inode);
2390 mnt_drop_write_file(filp);
2391 return ret;
2392 }
2393
f2fs_ioc_abort_atomic_write(struct file * filp)2394 static int f2fs_ioc_abort_atomic_write(struct file *filp)
2395 {
2396 struct inode *inode = file_inode(filp);
2397 struct mnt_idmap *idmap = file_mnt_idmap(filp);
2398 int ret;
2399
2400 if (!(filp->f_mode & FMODE_WRITE))
2401 return -EBADF;
2402
2403 if (!inode_owner_or_capable(idmap, inode))
2404 return -EACCES;
2405
2406 ret = mnt_want_write_file(filp);
2407 if (ret)
2408 return ret;
2409
2410 inode_lock(inode);
2411
2412 f2fs_abort_atomic_write(inode, true);
2413
2414 inode_unlock(inode);
2415
2416 mnt_drop_write_file(filp);
2417 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2418 return ret;
2419 }
2420
f2fs_do_shutdown(struct f2fs_sb_info * sbi,unsigned int flag,bool readonly,bool need_lock)2421 int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag,
2422 bool readonly, bool need_lock)
2423 {
2424 struct super_block *sb = sbi->sb;
2425 int ret = 0;
2426
2427 switch (flag) {
2428 case F2FS_GOING_DOWN_FULLSYNC:
2429 ret = bdev_freeze(sb->s_bdev);
2430 if (ret)
2431 goto out;
2432 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2433 bdev_thaw(sb->s_bdev);
2434 break;
2435 case F2FS_GOING_DOWN_METASYNC:
2436 /* do checkpoint only */
2437 ret = f2fs_sync_fs(sb, 1);
2438 if (ret) {
2439 if (ret == -EIO)
2440 ret = 0;
2441 goto out;
2442 }
2443 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2444 break;
2445 case F2FS_GOING_DOWN_NOSYNC:
2446 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2447 break;
2448 case F2FS_GOING_DOWN_METAFLUSH:
2449 f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
2450 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2451 break;
2452 case F2FS_GOING_DOWN_NEED_FSCK:
2453 set_sbi_flag(sbi, SBI_NEED_FSCK);
2454 set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
2455 set_sbi_flag(sbi, SBI_IS_DIRTY);
2456 /* do checkpoint only */
2457 ret = f2fs_sync_fs(sb, 1);
2458 if (ret == -EIO)
2459 ret = 0;
2460 goto out;
2461 default:
2462 ret = -EINVAL;
2463 goto out;
2464 }
2465
2466 if (readonly)
2467 goto out;
2468
2469 /*
2470 * grab sb->s_umount to avoid racing w/ remount() and other shutdown
2471 * paths.
2472 */
2473 if (need_lock)
2474 down_write(&sbi->sb->s_umount);
2475
2476 f2fs_stop_gc_thread(sbi);
2477 f2fs_stop_discard_thread(sbi);
2478
2479 f2fs_drop_discard_cmd(sbi);
2480 clear_opt(sbi, DISCARD);
2481
2482 if (need_lock)
2483 up_write(&sbi->sb->s_umount);
2484
2485 f2fs_update_time(sbi, REQ_TIME);
2486 out:
2487
2488 trace_f2fs_shutdown(sbi, flag, ret);
2489
2490 return ret;
2491 }
2492
f2fs_ioc_shutdown(struct file * filp,unsigned long arg)2493 static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
2494 {
2495 struct inode *inode = file_inode(filp);
2496 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2497 __u32 in;
2498 int ret;
2499 bool need_drop = false, readonly = false;
2500
2501 if (!capable(CAP_SYS_ADMIN))
2502 return -EPERM;
2503
2504 if (get_user(in, (__u32 __user *)arg))
2505 return -EFAULT;
2506
2507 if (in != F2FS_GOING_DOWN_FULLSYNC) {
2508 ret = mnt_want_write_file(filp);
2509 if (ret) {
2510 if (ret != -EROFS)
2511 return ret;
2512
2513 /* fallback to nosync shutdown for readonly fs */
2514 in = F2FS_GOING_DOWN_NOSYNC;
2515 readonly = true;
2516 } else {
2517 need_drop = true;
2518 }
2519 }
2520
2521 ret = f2fs_do_shutdown(sbi, in, readonly, true);
2522
2523 if (need_drop)
2524 mnt_drop_write_file(filp);
2525
2526 return ret;
2527 }
2528
f2fs_keep_noreuse_range(struct inode * inode,loff_t offset,loff_t len)2529 static int f2fs_keep_noreuse_range(struct inode *inode,
2530 loff_t offset, loff_t len)
2531 {
2532 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2533 u64 max_bytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
2534 u64 start, end;
2535 int ret = 0;
2536
2537 if (!S_ISREG(inode->i_mode))
2538 return 0;
2539
2540 if (offset >= max_bytes || len > max_bytes ||
2541 (offset + len) > max_bytes)
2542 return 0;
2543
2544 start = offset >> PAGE_SHIFT;
2545 end = DIV_ROUND_UP(offset + len, PAGE_SIZE);
2546
2547 inode_lock(inode);
2548 if (f2fs_is_atomic_file(inode)) {
2549 inode_unlock(inode);
2550 return 0;
2551 }
2552
2553 spin_lock(&sbi->inode_lock[DONATE_INODE]);
2554 /* let's remove the range, if len = 0 */
2555 if (!len) {
2556 if (!list_empty(&F2FS_I(inode)->gdonate_list)) {
2557 list_del_init(&F2FS_I(inode)->gdonate_list);
2558 sbi->donate_files--;
2559 if (is_inode_flag_set(inode, FI_DONATE_FINISHED))
2560 ret = -EALREADY;
2561 else
2562 set_inode_flag(inode, FI_DONATE_FINISHED);
2563 } else
2564 ret = -ENOENT;
2565 } else {
2566 if (list_empty(&F2FS_I(inode)->gdonate_list)) {
2567 list_add_tail(&F2FS_I(inode)->gdonate_list,
2568 &sbi->inode_list[DONATE_INODE]);
2569 sbi->donate_files++;
2570 } else {
2571 list_move_tail(&F2FS_I(inode)->gdonate_list,
2572 &sbi->inode_list[DONATE_INODE]);
2573 }
2574 F2FS_I(inode)->donate_start = start;
2575 F2FS_I(inode)->donate_end = end - 1;
2576 clear_inode_flag(inode, FI_DONATE_FINISHED);
2577 }
2578 spin_unlock(&sbi->inode_lock[DONATE_INODE]);
2579 inode_unlock(inode);
2580
2581 return ret;
2582 }
2583
f2fs_ioc_fitrim(struct file * filp,unsigned long arg)2584 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
2585 {
2586 struct inode *inode = file_inode(filp);
2587 struct super_block *sb = inode->i_sb;
2588 struct fstrim_range range;
2589 int ret;
2590
2591 if (!capable(CAP_SYS_ADMIN))
2592 return -EPERM;
2593
2594 if (!f2fs_hw_support_discard(F2FS_SB(sb)))
2595 return -EOPNOTSUPP;
2596
2597 if (copy_from_user(&range, (struct fstrim_range __user *)arg,
2598 sizeof(range)))
2599 return -EFAULT;
2600
2601 ret = mnt_want_write_file(filp);
2602 if (ret)
2603 return ret;
2604
2605 range.minlen = max((unsigned int)range.minlen,
2606 bdev_discard_granularity(sb->s_bdev));
2607 ret = f2fs_trim_fs(F2FS_SB(sb), &range);
2608 mnt_drop_write_file(filp);
2609 if (ret < 0)
2610 return ret;
2611
2612 if (copy_to_user((struct fstrim_range __user *)arg, &range,
2613 sizeof(range)))
2614 return -EFAULT;
2615 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2616 return 0;
2617 }
2618
uuid_is_nonzero(__u8 u[16])2619 static bool uuid_is_nonzero(__u8 u[16])
2620 {
2621 int i;
2622
2623 for (i = 0; i < 16; i++)
2624 if (u[i])
2625 return true;
2626 return false;
2627 }
2628
f2fs_ioc_set_encryption_policy(struct file * filp,unsigned long arg)2629 static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
2630 {
2631 struct inode *inode = file_inode(filp);
2632 int ret;
2633
2634 if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode)))
2635 return -EOPNOTSUPP;
2636
2637 ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
2638 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2639 return ret;
2640 }
2641
f2fs_ioc_get_encryption_policy(struct file * filp,unsigned long arg)2642 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
2643 {
2644 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2645 return -EOPNOTSUPP;
2646 return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
2647 }
2648
f2fs_ioc_get_encryption_pwsalt(struct file * filp,unsigned long arg)2649 static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
2650 {
2651 struct inode *inode = file_inode(filp);
2652 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2653 u8 encrypt_pw_salt[16];
2654 int err;
2655
2656 if (!f2fs_sb_has_encrypt(sbi))
2657 return -EOPNOTSUPP;
2658
2659 err = mnt_want_write_file(filp);
2660 if (err)
2661 return err;
2662
2663 f2fs_down_write(&sbi->sb_lock);
2664
2665 if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt))
2666 goto got_it;
2667
2668 /* update superblock with uuid */
2669 generate_random_uuid(sbi->raw_super->encrypt_pw_salt);
2670
2671 err = f2fs_commit_super(sbi, false);
2672 if (err) {
2673 /* undo new data */
2674 memset(sbi->raw_super->encrypt_pw_salt, 0, 16);
2675 goto out_err;
2676 }
2677 got_it:
2678 memcpy(encrypt_pw_salt, sbi->raw_super->encrypt_pw_salt, 16);
2679 out_err:
2680 f2fs_up_write(&sbi->sb_lock);
2681 mnt_drop_write_file(filp);
2682
2683 if (!err && copy_to_user((__u8 __user *)arg, encrypt_pw_salt, 16))
2684 err = -EFAULT;
2685
2686 return err;
2687 }
2688
f2fs_ioc_get_encryption_policy_ex(struct file * filp,unsigned long arg)2689 static int f2fs_ioc_get_encryption_policy_ex(struct file *filp,
2690 unsigned long arg)
2691 {
2692 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2693 return -EOPNOTSUPP;
2694
2695 return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg);
2696 }
2697
f2fs_ioc_add_encryption_key(struct file * filp,unsigned long arg)2698 static int f2fs_ioc_add_encryption_key(struct file *filp, unsigned long arg)
2699 {
2700 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2701 return -EOPNOTSUPP;
2702
2703 return fscrypt_ioctl_add_key(filp, (void __user *)arg);
2704 }
2705
f2fs_ioc_remove_encryption_key(struct file * filp,unsigned long arg)2706 static int f2fs_ioc_remove_encryption_key(struct file *filp, unsigned long arg)
2707 {
2708 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2709 return -EOPNOTSUPP;
2710
2711 return fscrypt_ioctl_remove_key(filp, (void __user *)arg);
2712 }
2713
f2fs_ioc_remove_encryption_key_all_users(struct file * filp,unsigned long arg)2714 static int f2fs_ioc_remove_encryption_key_all_users(struct file *filp,
2715 unsigned long arg)
2716 {
2717 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2718 return -EOPNOTSUPP;
2719
2720 return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg);
2721 }
2722
f2fs_ioc_get_encryption_key_status(struct file * filp,unsigned long arg)2723 static int f2fs_ioc_get_encryption_key_status(struct file *filp,
2724 unsigned long arg)
2725 {
2726 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2727 return -EOPNOTSUPP;
2728
2729 return fscrypt_ioctl_get_key_status(filp, (void __user *)arg);
2730 }
2731
f2fs_ioc_get_encryption_nonce(struct file * filp,unsigned long arg)2732 static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg)
2733 {
2734 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2735 return -EOPNOTSUPP;
2736
2737 return fscrypt_ioctl_get_nonce(filp, (void __user *)arg);
2738 }
2739
f2fs_ioc_gc(struct file * filp,unsigned long arg)2740 static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
2741 {
2742 struct inode *inode = file_inode(filp);
2743 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2744 struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
2745 .no_bg_gc = false,
2746 .should_migrate_blocks = false,
2747 .nr_free_secs = 0 };
2748 __u32 sync;
2749 int ret;
2750
2751 if (!capable(CAP_SYS_ADMIN))
2752 return -EPERM;
2753
2754 if (get_user(sync, (__u32 __user *)arg))
2755 return -EFAULT;
2756
2757 if (f2fs_readonly(sbi->sb))
2758 return -EROFS;
2759
2760 ret = mnt_want_write_file(filp);
2761 if (ret)
2762 return ret;
2763
2764 if (!sync) {
2765 if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
2766 ret = -EBUSY;
2767 goto out;
2768 }
2769 } else {
2770 f2fs_down_write(&sbi->gc_lock);
2771 }
2772
2773 gc_control.init_gc_type = sync ? FG_GC : BG_GC;
2774 gc_control.err_gc_skipped = sync;
2775 stat_inc_gc_call_count(sbi, FOREGROUND);
2776 ret = f2fs_gc(sbi, &gc_control);
2777 out:
2778 mnt_drop_write_file(filp);
2779 return ret;
2780 }
2781
__f2fs_ioc_gc_range(struct file * filp,struct f2fs_gc_range * range)2782 static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
2783 {
2784 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
2785 struct f2fs_gc_control gc_control = {
2786 .init_gc_type = range->sync ? FG_GC : BG_GC,
2787 .no_bg_gc = false,
2788 .should_migrate_blocks = false,
2789 .err_gc_skipped = range->sync,
2790 .nr_free_secs = 0 };
2791 u64 end;
2792 int ret;
2793
2794 if (!capable(CAP_SYS_ADMIN))
2795 return -EPERM;
2796 if (f2fs_readonly(sbi->sb))
2797 return -EROFS;
2798
2799 end = range->start + range->len;
2800 if (end < range->start || range->start < MAIN_BLKADDR(sbi) ||
2801 end >= MAX_BLKADDR(sbi))
2802 return -EINVAL;
2803
2804 ret = mnt_want_write_file(filp);
2805 if (ret)
2806 return ret;
2807
2808 do_more:
2809 if (!range->sync) {
2810 if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
2811 ret = -EBUSY;
2812 goto out;
2813 }
2814 } else {
2815 f2fs_down_write(&sbi->gc_lock);
2816 }
2817
2818 gc_control.victim_segno = GET_SEGNO(sbi, range->start);
2819 stat_inc_gc_call_count(sbi, FOREGROUND);
2820 ret = f2fs_gc(sbi, &gc_control);
2821 if (ret) {
2822 if (ret == -EBUSY)
2823 ret = -EAGAIN;
2824 goto out;
2825 }
2826 range->start += CAP_BLKS_PER_SEC(sbi);
2827 if (range->start <= end)
2828 goto do_more;
2829 out:
2830 mnt_drop_write_file(filp);
2831 return ret;
2832 }
2833
f2fs_ioc_gc_range(struct file * filp,unsigned long arg)2834 static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
2835 {
2836 struct f2fs_gc_range range;
2837
2838 if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg,
2839 sizeof(range)))
2840 return -EFAULT;
2841 return __f2fs_ioc_gc_range(filp, &range);
2842 }
2843
f2fs_ioc_write_checkpoint(struct file * filp)2844 static int f2fs_ioc_write_checkpoint(struct file *filp)
2845 {
2846 struct inode *inode = file_inode(filp);
2847 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2848 int ret;
2849
2850 if (!capable(CAP_SYS_ADMIN))
2851 return -EPERM;
2852
2853 if (f2fs_readonly(sbi->sb))
2854 return -EROFS;
2855
2856 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2857 f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled.");
2858 return -EINVAL;
2859 }
2860
2861 ret = mnt_want_write_file(filp);
2862 if (ret)
2863 return ret;
2864
2865 ret = f2fs_sync_fs(sbi->sb, 1);
2866
2867 mnt_drop_write_file(filp);
2868 return ret;
2869 }
2870
f2fs_defragment_range(struct f2fs_sb_info * sbi,struct file * filp,struct f2fs_defragment * range)2871 static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
2872 struct file *filp,
2873 struct f2fs_defragment *range)
2874 {
2875 struct inode *inode = file_inode(filp);
2876 struct f2fs_map_blocks map = { .m_next_extent = NULL,
2877 .m_seg_type = NO_CHECK_TYPE,
2878 .m_may_create = false };
2879 struct extent_info ei = {};
2880 pgoff_t pg_start, pg_end, next_pgofs;
2881 unsigned int total = 0, sec_num;
2882 block_t blk_end = 0;
2883 bool fragmented = false;
2884 int err;
2885
2886 f2fs_balance_fs(sbi, true);
2887
2888 inode_lock(inode);
2889 pg_start = range->start >> PAGE_SHIFT;
2890 pg_end = min_t(pgoff_t,
2891 (range->start + range->len) >> PAGE_SHIFT,
2892 DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE));
2893
2894 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) ||
2895 f2fs_is_atomic_file(inode)) {
2896 err = -EINVAL;
2897 goto unlock_out;
2898 }
2899
2900 /* if in-place-update policy is enabled, don't waste time here */
2901 set_inode_flag(inode, FI_OPU_WRITE);
2902 if (f2fs_should_update_inplace(inode, NULL)) {
2903 err = -EINVAL;
2904 goto out;
2905 }
2906
2907 /* writeback all dirty pages in the range */
2908 err = filemap_write_and_wait_range(inode->i_mapping,
2909 pg_start << PAGE_SHIFT,
2910 (pg_end << PAGE_SHIFT) - 1);
2911 if (err)
2912 goto out;
2913
2914 /*
2915 * lookup mapping info in extent cache, skip defragmenting if physical
2916 * block addresses are continuous.
2917 */
2918 if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
2919 if ((pgoff_t)ei.fofs + ei.len >= pg_end)
2920 goto out;
2921 }
2922
2923 map.m_lblk = pg_start;
2924 map.m_next_pgofs = &next_pgofs;
2925
2926 /*
2927 * lookup mapping info in dnode page cache, skip defragmenting if all
2928 * physical block addresses are continuous even if there are hole(s)
2929 * in logical blocks.
2930 */
2931 while (map.m_lblk < pg_end) {
2932 map.m_len = pg_end - map.m_lblk;
2933 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
2934 if (err)
2935 goto out;
2936
2937 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
2938 map.m_lblk = next_pgofs;
2939 continue;
2940 }
2941
2942 if (blk_end && blk_end != map.m_pblk)
2943 fragmented = true;
2944
2945 /* record total count of block that we're going to move */
2946 total += map.m_len;
2947
2948 blk_end = map.m_pblk + map.m_len;
2949
2950 map.m_lblk += map.m_len;
2951 }
2952
2953 if (!fragmented) {
2954 total = 0;
2955 goto out;
2956 }
2957
2958 sec_num = DIV_ROUND_UP(total, CAP_BLKS_PER_SEC(sbi));
2959
2960 /*
2961 * make sure there are enough free section for LFS allocation, this can
2962 * avoid defragment running in SSR mode when free section are allocated
2963 * intensively
2964 */
2965 if (has_not_enough_free_secs(sbi, 0, sec_num)) {
2966 err = -EAGAIN;
2967 goto out;
2968 }
2969
2970 map.m_lblk = pg_start;
2971 map.m_len = pg_end - pg_start;
2972 total = 0;
2973
2974 while (map.m_lblk < pg_end) {
2975 pgoff_t idx;
2976 int cnt = 0;
2977
2978 do_map:
2979 map.m_len = pg_end - map.m_lblk;
2980 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
2981 if (err)
2982 goto clear_out;
2983
2984 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
2985 map.m_lblk = next_pgofs;
2986 goto check;
2987 }
2988
2989 set_inode_flag(inode, FI_SKIP_WRITES);
2990
2991 idx = map.m_lblk;
2992 while (idx < map.m_lblk + map.m_len &&
2993 cnt < BLKS_PER_SEG(sbi)) {
2994 struct folio *folio;
2995
2996 folio = f2fs_get_lock_data_folio(inode, idx, true);
2997 if (IS_ERR(folio)) {
2998 err = PTR_ERR(folio);
2999 goto clear_out;
3000 }
3001
3002 f2fs_folio_wait_writeback(folio, DATA, true, true);
3003
3004 folio_mark_dirty(folio);
3005 folio_set_f2fs_gcing(folio);
3006 f2fs_folio_put(folio, true);
3007
3008 idx++;
3009 cnt++;
3010 total++;
3011 }
3012
3013 map.m_lblk = idx;
3014 check:
3015 if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi))
3016 goto do_map;
3017
3018 clear_inode_flag(inode, FI_SKIP_WRITES);
3019
3020 err = filemap_fdatawrite(inode->i_mapping);
3021 if (err)
3022 goto out;
3023 }
3024 clear_out:
3025 clear_inode_flag(inode, FI_SKIP_WRITES);
3026 out:
3027 clear_inode_flag(inode, FI_OPU_WRITE);
3028 unlock_out:
3029 inode_unlock(inode);
3030 if (!err)
3031 range->len = (u64)total << PAGE_SHIFT;
3032 return err;
3033 }
3034
f2fs_ioc_defragment(struct file * filp,unsigned long arg)3035 static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
3036 {
3037 struct inode *inode = file_inode(filp);
3038 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3039 struct f2fs_defragment range;
3040 int err;
3041
3042 if (!capable(CAP_SYS_ADMIN))
3043 return -EPERM;
3044
3045 if (!S_ISREG(inode->i_mode))
3046 return -EINVAL;
3047
3048 if (f2fs_readonly(sbi->sb))
3049 return -EROFS;
3050
3051 if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
3052 sizeof(range)))
3053 return -EFAULT;
3054
3055 /* verify alignment of offset & size */
3056 if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1))
3057 return -EINVAL;
3058
3059 if (unlikely((range.start + range.len) >> PAGE_SHIFT >
3060 max_file_blocks(inode)))
3061 return -EINVAL;
3062
3063 err = mnt_want_write_file(filp);
3064 if (err)
3065 return err;
3066
3067 err = f2fs_defragment_range(sbi, filp, &range);
3068 mnt_drop_write_file(filp);
3069
3070 if (range.len)
3071 f2fs_update_time(sbi, REQ_TIME);
3072 if (err < 0)
3073 return err;
3074
3075 if (copy_to_user((struct f2fs_defragment __user *)arg, &range,
3076 sizeof(range)))
3077 return -EFAULT;
3078
3079 return 0;
3080 }
3081
f2fs_move_file_range(struct file * file_in,loff_t pos_in,struct file * file_out,loff_t pos_out,size_t len)3082 static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
3083 struct file *file_out, loff_t pos_out, size_t len)
3084 {
3085 struct inode *src = file_inode(file_in);
3086 struct inode *dst = file_inode(file_out);
3087 struct f2fs_sb_info *sbi = F2FS_I_SB(src);
3088 size_t olen = len, dst_max_i_size = 0;
3089 size_t dst_osize;
3090 int ret;
3091
3092 if (file_in->f_path.mnt != file_out->f_path.mnt ||
3093 src->i_sb != dst->i_sb)
3094 return -EXDEV;
3095
3096 if (unlikely(f2fs_readonly(src->i_sb)))
3097 return -EROFS;
3098
3099 if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode))
3100 return -EINVAL;
3101
3102 if (IS_ENCRYPTED(src) || IS_ENCRYPTED(dst))
3103 return -EOPNOTSUPP;
3104
3105 if (pos_out < 0 || pos_in < 0)
3106 return -EINVAL;
3107
3108 if (src == dst) {
3109 if (pos_in == pos_out)
3110 return 0;
3111 if (pos_out > pos_in && pos_out < pos_in + len)
3112 return -EINVAL;
3113 }
3114
3115 inode_lock(src);
3116 if (src != dst) {
3117 ret = -EBUSY;
3118 if (!inode_trylock(dst))
3119 goto out;
3120 }
3121
3122 if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) ||
3123 f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) {
3124 ret = -EOPNOTSUPP;
3125 goto out_unlock;
3126 }
3127
3128 if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) {
3129 ret = -EINVAL;
3130 goto out_unlock;
3131 }
3132
3133 ret = -EINVAL;
3134 if (pos_in + len > src->i_size || pos_in + len < pos_in)
3135 goto out_unlock;
3136 if (len == 0)
3137 olen = len = src->i_size - pos_in;
3138 if (pos_in + len == src->i_size)
3139 len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in;
3140 if (len == 0) {
3141 ret = 0;
3142 goto out_unlock;
3143 }
3144
3145 dst_osize = dst->i_size;
3146 if (pos_out + olen > dst->i_size)
3147 dst_max_i_size = pos_out + olen;
3148
3149 /* verify the end result is block aligned */
3150 if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) ||
3151 !IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) ||
3152 !IS_ALIGNED(pos_out, F2FS_BLKSIZE))
3153 goto out_unlock;
3154
3155 ret = f2fs_convert_inline_inode(src);
3156 if (ret)
3157 goto out_unlock;
3158
3159 ret = f2fs_convert_inline_inode(dst);
3160 if (ret)
3161 goto out_unlock;
3162
3163 /* write out all dirty pages from offset */
3164 ret = filemap_write_and_wait_range(src->i_mapping,
3165 pos_in, pos_in + len);
3166 if (ret)
3167 goto out_unlock;
3168
3169 ret = filemap_write_and_wait_range(dst->i_mapping,
3170 pos_out, pos_out + len);
3171 if (ret)
3172 goto out_unlock;
3173
3174 f2fs_balance_fs(sbi, true);
3175
3176 f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
3177 if (src != dst) {
3178 ret = -EBUSY;
3179 if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
3180 goto out_src;
3181 }
3182
3183 f2fs_lock_op(sbi);
3184 ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in),
3185 F2FS_BYTES_TO_BLK(pos_out),
3186 F2FS_BYTES_TO_BLK(len), false);
3187
3188 if (!ret) {
3189 if (dst_max_i_size)
3190 f2fs_i_size_write(dst, dst_max_i_size);
3191 else if (dst_osize != dst->i_size)
3192 f2fs_i_size_write(dst, dst_osize);
3193 }
3194 f2fs_unlock_op(sbi);
3195
3196 if (src != dst)
3197 f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
3198 out_src:
3199 f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
3200 if (ret)
3201 goto out_unlock;
3202
3203 inode_set_mtime_to_ts(src, inode_set_ctime_current(src));
3204 f2fs_mark_inode_dirty_sync(src, false);
3205 if (src != dst) {
3206 inode_set_mtime_to_ts(dst, inode_set_ctime_current(dst));
3207 f2fs_mark_inode_dirty_sync(dst, false);
3208 }
3209 f2fs_update_time(sbi, REQ_TIME);
3210
3211 out_unlock:
3212 if (src != dst)
3213 inode_unlock(dst);
3214 out:
3215 inode_unlock(src);
3216 return ret;
3217 }
3218
__f2fs_ioc_move_range(struct file * filp,struct f2fs_move_range * range)3219 static int __f2fs_ioc_move_range(struct file *filp,
3220 struct f2fs_move_range *range)
3221 {
3222 int err;
3223
3224 if (!(filp->f_mode & FMODE_READ) ||
3225 !(filp->f_mode & FMODE_WRITE))
3226 return -EBADF;
3227
3228 CLASS(fd, dst)(range->dst_fd);
3229 if (fd_empty(dst))
3230 return -EBADF;
3231
3232 if (!(fd_file(dst)->f_mode & FMODE_WRITE))
3233 return -EBADF;
3234
3235 err = mnt_want_write_file(filp);
3236 if (err)
3237 return err;
3238
3239 err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst),
3240 range->pos_out, range->len);
3241
3242 mnt_drop_write_file(filp);
3243 return err;
3244 }
3245
f2fs_ioc_move_range(struct file * filp,unsigned long arg)3246 static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
3247 {
3248 struct f2fs_move_range range;
3249
3250 if (copy_from_user(&range, (struct f2fs_move_range __user *)arg,
3251 sizeof(range)))
3252 return -EFAULT;
3253 return __f2fs_ioc_move_range(filp, &range);
3254 }
3255
f2fs_ioc_flush_device(struct file * filp,unsigned long arg)3256 static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
3257 {
3258 struct inode *inode = file_inode(filp);
3259 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3260 struct sit_info *sm = SIT_I(sbi);
3261 unsigned int start_segno = 0, end_segno = 0;
3262 unsigned int dev_start_segno = 0, dev_end_segno = 0;
3263 struct f2fs_flush_device range;
3264 struct f2fs_gc_control gc_control = {
3265 .init_gc_type = FG_GC,
3266 .should_migrate_blocks = true,
3267 .err_gc_skipped = true,
3268 .nr_free_secs = 0 };
3269 int ret;
3270
3271 if (!capable(CAP_SYS_ADMIN))
3272 return -EPERM;
3273
3274 if (f2fs_readonly(sbi->sb))
3275 return -EROFS;
3276
3277 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
3278 return -EINVAL;
3279
3280 if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
3281 sizeof(range)))
3282 return -EFAULT;
3283
3284 if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
3285 __is_large_section(sbi)) {
3286 f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1",
3287 range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi));
3288 return -EINVAL;
3289 }
3290
3291 ret = mnt_want_write_file(filp);
3292 if (ret)
3293 return ret;
3294
3295 if (range.dev_num != 0)
3296 dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk);
3297 dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk);
3298
3299 start_segno = sm->last_victim[FLUSH_DEVICE];
3300 if (start_segno < dev_start_segno || start_segno >= dev_end_segno)
3301 start_segno = dev_start_segno;
3302 end_segno = min(start_segno + range.segments, dev_end_segno);
3303
3304 while (start_segno < end_segno) {
3305 if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
3306 ret = -EBUSY;
3307 goto out;
3308 }
3309 sm->last_victim[GC_CB] = end_segno + 1;
3310 sm->last_victim[GC_GREEDY] = end_segno + 1;
3311 sm->last_victim[ALLOC_NEXT] = end_segno + 1;
3312
3313 gc_control.victim_segno = start_segno;
3314 stat_inc_gc_call_count(sbi, FOREGROUND);
3315 ret = f2fs_gc(sbi, &gc_control);
3316 if (ret == -EAGAIN)
3317 ret = 0;
3318 else if (ret < 0)
3319 break;
3320 start_segno++;
3321 }
3322 out:
3323 mnt_drop_write_file(filp);
3324 return ret;
3325 }
3326
f2fs_ioc_get_features(struct file * filp,unsigned long arg)3327 static int f2fs_ioc_get_features(struct file *filp, unsigned long arg)
3328 {
3329 struct inode *inode = file_inode(filp);
3330 u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature);
3331
3332 /* Must validate to set it with SQLite behavior in Android. */
3333 sb_feature |= F2FS_FEATURE_ATOMIC_WRITE;
3334
3335 return put_user(sb_feature, (u32 __user *)arg);
3336 }
3337
3338 #ifdef CONFIG_QUOTA
f2fs_transfer_project_quota(struct inode * inode,kprojid_t kprojid)3339 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
3340 {
3341 struct dquot *transfer_to[MAXQUOTAS] = {};
3342 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3343 struct super_block *sb = sbi->sb;
3344 int err;
3345
3346 transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
3347 if (IS_ERR(transfer_to[PRJQUOTA]))
3348 return PTR_ERR(transfer_to[PRJQUOTA]);
3349
3350 err = __dquot_transfer(inode, transfer_to);
3351 if (err)
3352 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
3353 dqput(transfer_to[PRJQUOTA]);
3354 return err;
3355 }
3356
f2fs_ioc_setproject(struct inode * inode,__u32 projid)3357 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
3358 {
3359 struct f2fs_inode_info *fi = F2FS_I(inode);
3360 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3361 struct f2fs_inode *ri = NULL;
3362 kprojid_t kprojid;
3363 int err;
3364
3365 if (!f2fs_sb_has_project_quota(sbi)) {
3366 if (projid != F2FS_DEF_PROJID)
3367 return -EOPNOTSUPP;
3368 else
3369 return 0;
3370 }
3371
3372 if (!f2fs_has_extra_attr(inode))
3373 return -EOPNOTSUPP;
3374
3375 kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
3376
3377 if (projid_eq(kprojid, fi->i_projid))
3378 return 0;
3379
3380 err = -EPERM;
3381 /* Is it quota file? Do not allow user to mess with it */
3382 if (IS_NOQUOTA(inode))
3383 return err;
3384
3385 if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid))
3386 return -EOVERFLOW;
3387
3388 err = f2fs_dquot_initialize(inode);
3389 if (err)
3390 return err;
3391
3392 f2fs_lock_op(sbi);
3393 err = f2fs_transfer_project_quota(inode, kprojid);
3394 if (err)
3395 goto out_unlock;
3396
3397 fi->i_projid = kprojid;
3398 inode_set_ctime_current(inode);
3399 f2fs_mark_inode_dirty_sync(inode, true);
3400 out_unlock:
3401 f2fs_unlock_op(sbi);
3402 return err;
3403 }
3404 #else
f2fs_transfer_project_quota(struct inode * inode,kprojid_t kprojid)3405 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
3406 {
3407 return 0;
3408 }
3409
f2fs_ioc_setproject(struct inode * inode,__u32 projid)3410 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
3411 {
3412 if (projid != F2FS_DEF_PROJID)
3413 return -EOPNOTSUPP;
3414 return 0;
3415 }
3416 #endif
3417
f2fs_fileattr_get(struct dentry * dentry,struct file_kattr * fa)3418 int f2fs_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
3419 {
3420 struct inode *inode = d_inode(dentry);
3421 struct f2fs_inode_info *fi = F2FS_I(inode);
3422 u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags);
3423
3424 if (IS_ENCRYPTED(inode))
3425 fsflags |= FS_ENCRYPT_FL;
3426 if (IS_VERITY(inode))
3427 fsflags |= FS_VERITY_FL;
3428 if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
3429 fsflags |= FS_INLINE_DATA_FL;
3430 if (is_inode_flag_set(inode, FI_PIN_FILE))
3431 fsflags |= FS_NOCOW_FL;
3432
3433 fileattr_fill_flags(fa, fsflags & F2FS_GETTABLE_FS_FL);
3434
3435 if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)))
3436 fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid);
3437
3438 return 0;
3439 }
3440
f2fs_fileattr_set(struct mnt_idmap * idmap,struct dentry * dentry,struct file_kattr * fa)3441 int f2fs_fileattr_set(struct mnt_idmap *idmap,
3442 struct dentry *dentry, struct file_kattr *fa)
3443 {
3444 struct inode *inode = d_inode(dentry);
3445 u32 fsflags = fa->flags, mask = F2FS_SETTABLE_FS_FL;
3446 u32 iflags;
3447 int err;
3448
3449 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
3450 return -EIO;
3451 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
3452 return -ENOSPC;
3453 if (fsflags & ~F2FS_GETTABLE_FS_FL)
3454 return -EOPNOTSUPP;
3455 fsflags &= F2FS_SETTABLE_FS_FL;
3456 if (!fa->flags_valid)
3457 mask &= FS_COMMON_FL;
3458
3459 iflags = f2fs_fsflags_to_iflags(fsflags);
3460 if (f2fs_mask_flags(inode->i_mode, iflags) != iflags)
3461 return -EOPNOTSUPP;
3462
3463 err = f2fs_setflags_common(inode, iflags, f2fs_fsflags_to_iflags(mask));
3464 if (!err)
3465 err = f2fs_ioc_setproject(inode, fa->fsx_projid);
3466
3467 return err;
3468 }
3469
f2fs_pin_file_control(struct inode * inode,bool inc)3470 int f2fs_pin_file_control(struct inode *inode, bool inc)
3471 {
3472 struct f2fs_inode_info *fi = F2FS_I(inode);
3473 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3474
3475 if (IS_DEVICE_ALIASING(inode))
3476 return -EINVAL;
3477
3478 if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) {
3479 f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials",
3480 __func__, inode->i_ino, fi->i_gc_failures);
3481 clear_inode_flag(inode, FI_PIN_FILE);
3482 return -EAGAIN;
3483 }
3484
3485 /* Use i_gc_failures for normal file as a risk signal. */
3486 if (inc)
3487 f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1);
3488
3489 return 0;
3490 }
3491
f2fs_ioc_set_pin_file(struct file * filp,unsigned long arg)3492 static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
3493 {
3494 struct inode *inode = file_inode(filp);
3495 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3496 __u32 pin;
3497 int ret = 0;
3498
3499 if (get_user(pin, (__u32 __user *)arg))
3500 return -EFAULT;
3501
3502 if (!S_ISREG(inode->i_mode))
3503 return -EINVAL;
3504
3505 if (f2fs_readonly(sbi->sb))
3506 return -EROFS;
3507
3508 if (!pin && IS_DEVICE_ALIASING(inode))
3509 return -EOPNOTSUPP;
3510
3511 ret = mnt_want_write_file(filp);
3512 if (ret)
3513 return ret;
3514
3515 inode_lock(inode);
3516
3517 if (f2fs_is_atomic_file(inode)) {
3518 ret = -EINVAL;
3519 goto out;
3520 }
3521
3522 if (!pin) {
3523 clear_inode_flag(inode, FI_PIN_FILE);
3524 f2fs_i_gc_failures_write(inode, 0);
3525 goto done;
3526 } else if (f2fs_is_pinned_file(inode)) {
3527 goto done;
3528 }
3529
3530 if (F2FS_HAS_BLOCKS(inode)) {
3531 ret = -EFBIG;
3532 goto out;
3533 }
3534
3535 /* Let's allow file pinning on zoned device. */
3536 if (!f2fs_sb_has_blkzoned(sbi) &&
3537 f2fs_should_update_outplace(inode, NULL)) {
3538 ret = -EINVAL;
3539 goto out;
3540 }
3541
3542 if (f2fs_pin_file_control(inode, false)) {
3543 ret = -EAGAIN;
3544 goto out;
3545 }
3546
3547 ret = f2fs_convert_inline_inode(inode);
3548 if (ret)
3549 goto out;
3550
3551 if (!f2fs_disable_compressed_file(inode)) {
3552 ret = -EOPNOTSUPP;
3553 goto out;
3554 }
3555
3556 set_inode_flag(inode, FI_PIN_FILE);
3557 ret = F2FS_I(inode)->i_gc_failures;
3558 done:
3559 f2fs_update_time(sbi, REQ_TIME);
3560 out:
3561 inode_unlock(inode);
3562 mnt_drop_write_file(filp);
3563 return ret;
3564 }
3565
f2fs_ioc_get_pin_file(struct file * filp,unsigned long arg)3566 static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg)
3567 {
3568 struct inode *inode = file_inode(filp);
3569 __u32 pin = 0;
3570
3571 if (is_inode_flag_set(inode, FI_PIN_FILE))
3572 pin = F2FS_I(inode)->i_gc_failures;
3573 return put_user(pin, (u32 __user *)arg);
3574 }
3575
f2fs_ioc_get_dev_alias_file(struct file * filp,unsigned long arg)3576 static int f2fs_ioc_get_dev_alias_file(struct file *filp, unsigned long arg)
3577 {
3578 return put_user(IS_DEVICE_ALIASING(file_inode(filp)) ? 1 : 0,
3579 (u32 __user *)arg);
3580 }
3581
f2fs_ioc_io_prio(struct file * filp,unsigned long arg)3582 static int f2fs_ioc_io_prio(struct file *filp, unsigned long arg)
3583 {
3584 struct inode *inode = file_inode(filp);
3585 __u32 level;
3586
3587 if (get_user(level, (__u32 __user *)arg))
3588 return -EFAULT;
3589
3590 if (!S_ISREG(inode->i_mode) || level >= F2FS_IOPRIO_MAX)
3591 return -EINVAL;
3592
3593 inode_lock(inode);
3594 F2FS_I(inode)->ioprio_hint = level;
3595 inode_unlock(inode);
3596 return 0;
3597 }
3598
f2fs_precache_extents(struct inode * inode)3599 int f2fs_precache_extents(struct inode *inode)
3600 {
3601 struct f2fs_inode_info *fi = F2FS_I(inode);
3602 struct f2fs_map_blocks map;
3603 pgoff_t m_next_extent;
3604 loff_t end;
3605 int err;
3606
3607 if (is_inode_flag_set(inode, FI_NO_EXTENT))
3608 return -EOPNOTSUPP;
3609
3610 map.m_lblk = 0;
3611 map.m_pblk = 0;
3612 map.m_next_pgofs = NULL;
3613 map.m_next_extent = &m_next_extent;
3614 map.m_seg_type = NO_CHECK_TYPE;
3615 map.m_may_create = false;
3616 end = F2FS_BLK_ALIGN(i_size_read(inode));
3617
3618 while (map.m_lblk < end) {
3619 map.m_len = end - map.m_lblk;
3620
3621 f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
3622 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE);
3623 f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
3624 if (err || !map.m_len)
3625 return err;
3626
3627 map.m_lblk = m_next_extent;
3628 }
3629
3630 return 0;
3631 }
3632
f2fs_ioc_precache_extents(struct file * filp)3633 static int f2fs_ioc_precache_extents(struct file *filp)
3634 {
3635 return f2fs_precache_extents(file_inode(filp));
3636 }
3637
f2fs_ioc_resize_fs(struct file * filp,unsigned long arg)3638 static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
3639 {
3640 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
3641 __u64 block_count;
3642
3643 if (!capable(CAP_SYS_ADMIN))
3644 return -EPERM;
3645
3646 if (f2fs_readonly(sbi->sb))
3647 return -EROFS;
3648
3649 if (copy_from_user(&block_count, (void __user *)arg,
3650 sizeof(block_count)))
3651 return -EFAULT;
3652
3653 return f2fs_resize_fs(filp, block_count);
3654 }
3655
f2fs_ioc_enable_verity(struct file * filp,unsigned long arg)3656 static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
3657 {
3658 struct inode *inode = file_inode(filp);
3659
3660 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3661
3662 if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) {
3663 f2fs_warn(F2FS_I_SB(inode),
3664 "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem",
3665 inode->i_ino);
3666 return -EOPNOTSUPP;
3667 }
3668
3669 return fsverity_ioctl_enable(filp, (const void __user *)arg);
3670 }
3671
f2fs_ioc_measure_verity(struct file * filp,unsigned long arg)3672 static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg)
3673 {
3674 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
3675 return -EOPNOTSUPP;
3676
3677 return fsverity_ioctl_measure(filp, (void __user *)arg);
3678 }
3679
f2fs_ioc_read_verity_metadata(struct file * filp,unsigned long arg)3680 static int f2fs_ioc_read_verity_metadata(struct file *filp, unsigned long arg)
3681 {
3682 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
3683 return -EOPNOTSUPP;
3684
3685 return fsverity_ioctl_read_metadata(filp, (const void __user *)arg);
3686 }
3687
f2fs_ioc_getfslabel(struct file * filp,unsigned long arg)3688 static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg)
3689 {
3690 struct inode *inode = file_inode(filp);
3691 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3692 char *vbuf;
3693 int count;
3694 int err = 0;
3695
3696 vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL);
3697 if (!vbuf)
3698 return -ENOMEM;
3699
3700 f2fs_down_read(&sbi->sb_lock);
3701 count = utf16s_to_utf8s(sbi->raw_super->volume_name,
3702 ARRAY_SIZE(sbi->raw_super->volume_name),
3703 UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME);
3704 f2fs_up_read(&sbi->sb_lock);
3705
3706 if (copy_to_user((char __user *)arg, vbuf,
3707 min(FSLABEL_MAX, count)))
3708 err = -EFAULT;
3709
3710 kfree(vbuf);
3711 return err;
3712 }
3713
f2fs_ioc_setfslabel(struct file * filp,unsigned long arg)3714 static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg)
3715 {
3716 struct inode *inode = file_inode(filp);
3717 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3718 char *vbuf;
3719 int err = 0;
3720
3721 if (!capable(CAP_SYS_ADMIN))
3722 return -EPERM;
3723
3724 vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX);
3725 if (IS_ERR(vbuf))
3726 return PTR_ERR(vbuf);
3727
3728 err = mnt_want_write_file(filp);
3729 if (err)
3730 goto out;
3731
3732 f2fs_down_write(&sbi->sb_lock);
3733
3734 memset(sbi->raw_super->volume_name, 0,
3735 sizeof(sbi->raw_super->volume_name));
3736 utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN,
3737 sbi->raw_super->volume_name,
3738 ARRAY_SIZE(sbi->raw_super->volume_name));
3739
3740 err = f2fs_commit_super(sbi, false);
3741
3742 f2fs_up_write(&sbi->sb_lock);
3743
3744 mnt_drop_write_file(filp);
3745 out:
3746 kfree(vbuf);
3747 return err;
3748 }
3749
f2fs_get_compress_blocks(struct inode * inode,__u64 * blocks)3750 static int f2fs_get_compress_blocks(struct inode *inode, __u64 *blocks)
3751 {
3752 if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
3753 return -EOPNOTSUPP;
3754
3755 if (!f2fs_compressed_file(inode))
3756 return -EINVAL;
3757
3758 *blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks);
3759
3760 return 0;
3761 }
3762
f2fs_ioc_get_compress_blocks(struct file * filp,unsigned long arg)3763 static int f2fs_ioc_get_compress_blocks(struct file *filp, unsigned long arg)
3764 {
3765 struct inode *inode = file_inode(filp);
3766 __u64 blocks;
3767 int ret;
3768
3769 ret = f2fs_get_compress_blocks(inode, &blocks);
3770 if (ret < 0)
3771 return ret;
3772
3773 return put_user(blocks, (u64 __user *)arg);
3774 }
3775
release_compress_blocks(struct dnode_of_data * dn,pgoff_t count)3776 static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
3777 {
3778 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
3779 unsigned int released_blocks = 0;
3780 int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
3781 block_t blkaddr;
3782 int i;
3783
3784 for (i = 0; i < count; i++) {
3785 blkaddr = data_blkaddr(dn->inode, dn->node_folio,
3786 dn->ofs_in_node + i);
3787
3788 if (!__is_valid_data_blkaddr(blkaddr))
3789 continue;
3790 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
3791 DATA_GENERIC_ENHANCE)))
3792 return -EFSCORRUPTED;
3793 }
3794
3795 while (count) {
3796 int compr_blocks = 0;
3797
3798 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
3799 blkaddr = f2fs_data_blkaddr(dn);
3800
3801 if (i == 0) {
3802 if (blkaddr == COMPRESS_ADDR)
3803 continue;
3804 dn->ofs_in_node += cluster_size;
3805 goto next;
3806 }
3807
3808 if (__is_valid_data_blkaddr(blkaddr))
3809 compr_blocks++;
3810
3811 if (blkaddr != NEW_ADDR)
3812 continue;
3813
3814 f2fs_set_data_blkaddr(dn, NULL_ADDR);
3815 }
3816
3817 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false);
3818 dec_valid_block_count(sbi, dn->inode,
3819 cluster_size - compr_blocks);
3820
3821 released_blocks += cluster_size - compr_blocks;
3822 next:
3823 count -= cluster_size;
3824 }
3825
3826 return released_blocks;
3827 }
3828
f2fs_release_compress_blocks(struct file * filp,unsigned long arg)3829 static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
3830 {
3831 struct inode *inode = file_inode(filp);
3832 struct f2fs_inode_info *fi = F2FS_I(inode);
3833 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3834 pgoff_t page_idx = 0, last_idx;
3835 unsigned int released_blocks = 0;
3836 int ret;
3837 int writecount;
3838
3839 if (!f2fs_sb_has_compression(sbi))
3840 return -EOPNOTSUPP;
3841
3842 if (f2fs_readonly(sbi->sb))
3843 return -EROFS;
3844
3845 ret = mnt_want_write_file(filp);
3846 if (ret)
3847 return ret;
3848
3849 f2fs_balance_fs(sbi, true);
3850
3851 inode_lock(inode);
3852
3853 writecount = atomic_read(&inode->i_writecount);
3854 if ((filp->f_mode & FMODE_WRITE && writecount != 1) ||
3855 (!(filp->f_mode & FMODE_WRITE) && writecount)) {
3856 ret = -EBUSY;
3857 goto out;
3858 }
3859
3860 if (!f2fs_compressed_file(inode) ||
3861 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
3862 ret = -EINVAL;
3863 goto out;
3864 }
3865
3866 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
3867 if (ret)
3868 goto out;
3869
3870 if (!atomic_read(&fi->i_compr_blocks)) {
3871 ret = -EPERM;
3872 goto out;
3873 }
3874
3875 set_inode_flag(inode, FI_COMPRESS_RELEASED);
3876 inode_set_ctime_current(inode);
3877 f2fs_mark_inode_dirty_sync(inode, true);
3878
3879 f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
3880 filemap_invalidate_lock(inode->i_mapping);
3881
3882 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
3883
3884 while (page_idx < last_idx) {
3885 struct dnode_of_data dn;
3886 pgoff_t end_offset, count;
3887
3888 f2fs_lock_op(sbi);
3889
3890 set_new_dnode(&dn, inode, NULL, NULL, 0);
3891 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
3892 if (ret) {
3893 f2fs_unlock_op(sbi);
3894 if (ret == -ENOENT) {
3895 page_idx = f2fs_get_next_page_offset(&dn,
3896 page_idx);
3897 ret = 0;
3898 continue;
3899 }
3900 break;
3901 }
3902
3903 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
3904 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
3905 count = round_up(count, fi->i_cluster_size);
3906
3907 ret = release_compress_blocks(&dn, count);
3908
3909 f2fs_put_dnode(&dn);
3910
3911 f2fs_unlock_op(sbi);
3912
3913 if (ret < 0)
3914 break;
3915
3916 page_idx += count;
3917 released_blocks += ret;
3918 }
3919
3920 filemap_invalidate_unlock(inode->i_mapping);
3921 f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
3922 out:
3923 if (released_blocks)
3924 f2fs_update_time(sbi, REQ_TIME);
3925 inode_unlock(inode);
3926
3927 mnt_drop_write_file(filp);
3928
3929 if (ret >= 0) {
3930 ret = put_user(released_blocks, (u64 __user *)arg);
3931 } else if (released_blocks &&
3932 atomic_read(&fi->i_compr_blocks)) {
3933 set_sbi_flag(sbi, SBI_NEED_FSCK);
3934 f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
3935 "iblocks=%llu, released=%u, compr_blocks=%u, "
3936 "run fsck to fix.",
3937 __func__, inode->i_ino, inode->i_blocks,
3938 released_blocks,
3939 atomic_read(&fi->i_compr_blocks));
3940 }
3941
3942 return ret;
3943 }
3944
reserve_compress_blocks(struct dnode_of_data * dn,pgoff_t count,unsigned int * reserved_blocks)3945 static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count,
3946 unsigned int *reserved_blocks)
3947 {
3948 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
3949 int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
3950 block_t blkaddr;
3951 int i;
3952
3953 for (i = 0; i < count; i++) {
3954 blkaddr = data_blkaddr(dn->inode, dn->node_folio,
3955 dn->ofs_in_node + i);
3956
3957 if (!__is_valid_data_blkaddr(blkaddr))
3958 continue;
3959 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
3960 DATA_GENERIC_ENHANCE)))
3961 return -EFSCORRUPTED;
3962 }
3963
3964 while (count) {
3965 int compr_blocks = 0;
3966 blkcnt_t reserved = 0;
3967 blkcnt_t to_reserved;
3968 int ret;
3969
3970 for (i = 0; i < cluster_size; i++) {
3971 blkaddr = data_blkaddr(dn->inode, dn->node_folio,
3972 dn->ofs_in_node + i);
3973
3974 if (i == 0) {
3975 if (blkaddr != COMPRESS_ADDR) {
3976 dn->ofs_in_node += cluster_size;
3977 goto next;
3978 }
3979 continue;
3980 }
3981
3982 /*
3983 * compressed cluster was not released due to it
3984 * fails in release_compress_blocks(), so NEW_ADDR
3985 * is a possible case.
3986 */
3987 if (blkaddr == NEW_ADDR) {
3988 reserved++;
3989 continue;
3990 }
3991 if (__is_valid_data_blkaddr(blkaddr)) {
3992 compr_blocks++;
3993 continue;
3994 }
3995 }
3996
3997 to_reserved = cluster_size - compr_blocks - reserved;
3998
3999 /* for the case all blocks in cluster were reserved */
4000 if (reserved && to_reserved == 1) {
4001 dn->ofs_in_node += cluster_size;
4002 goto next;
4003 }
4004
4005 ret = inc_valid_block_count(sbi, dn->inode,
4006 &to_reserved, false);
4007 if (unlikely(ret))
4008 return ret;
4009
4010 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
4011 if (f2fs_data_blkaddr(dn) == NULL_ADDR)
4012 f2fs_set_data_blkaddr(dn, NEW_ADDR);
4013 }
4014
4015 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true);
4016
4017 *reserved_blocks += to_reserved;
4018 next:
4019 count -= cluster_size;
4020 }
4021
4022 return 0;
4023 }
4024
f2fs_reserve_compress_blocks(struct file * filp,unsigned long arg)4025 static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
4026 {
4027 struct inode *inode = file_inode(filp);
4028 struct f2fs_inode_info *fi = F2FS_I(inode);
4029 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4030 pgoff_t page_idx = 0, last_idx;
4031 unsigned int reserved_blocks = 0;
4032 int ret;
4033
4034 if (!f2fs_sb_has_compression(sbi))
4035 return -EOPNOTSUPP;
4036
4037 if (f2fs_readonly(sbi->sb))
4038 return -EROFS;
4039
4040 ret = mnt_want_write_file(filp);
4041 if (ret)
4042 return ret;
4043
4044 f2fs_balance_fs(sbi, true);
4045
4046 inode_lock(inode);
4047
4048 if (!f2fs_compressed_file(inode) ||
4049 !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
4050 ret = -EINVAL;
4051 goto unlock_inode;
4052 }
4053
4054 if (atomic_read(&fi->i_compr_blocks))
4055 goto unlock_inode;
4056
4057 f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
4058 filemap_invalidate_lock(inode->i_mapping);
4059
4060 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
4061
4062 while (page_idx < last_idx) {
4063 struct dnode_of_data dn;
4064 pgoff_t end_offset, count;
4065
4066 f2fs_lock_op(sbi);
4067
4068 set_new_dnode(&dn, inode, NULL, NULL, 0);
4069 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
4070 if (ret) {
4071 f2fs_unlock_op(sbi);
4072 if (ret == -ENOENT) {
4073 page_idx = f2fs_get_next_page_offset(&dn,
4074 page_idx);
4075 ret = 0;
4076 continue;
4077 }
4078 break;
4079 }
4080
4081 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
4082 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
4083 count = round_up(count, fi->i_cluster_size);
4084
4085 ret = reserve_compress_blocks(&dn, count, &reserved_blocks);
4086
4087 f2fs_put_dnode(&dn);
4088
4089 f2fs_unlock_op(sbi);
4090
4091 if (ret < 0)
4092 break;
4093
4094 page_idx += count;
4095 }
4096
4097 filemap_invalidate_unlock(inode->i_mapping);
4098 f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
4099
4100 if (!ret) {
4101 clear_inode_flag(inode, FI_COMPRESS_RELEASED);
4102 inode_set_ctime_current(inode);
4103 f2fs_mark_inode_dirty_sync(inode, true);
4104 }
4105 unlock_inode:
4106 if (reserved_blocks)
4107 f2fs_update_time(sbi, REQ_TIME);
4108 inode_unlock(inode);
4109 mnt_drop_write_file(filp);
4110
4111 if (!ret) {
4112 ret = put_user(reserved_blocks, (u64 __user *)arg);
4113 } else if (reserved_blocks &&
4114 atomic_read(&fi->i_compr_blocks)) {
4115 set_sbi_flag(sbi, SBI_NEED_FSCK);
4116 f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx "
4117 "iblocks=%llu, reserved=%u, compr_blocks=%u, "
4118 "run fsck to fix.",
4119 __func__, inode->i_ino, inode->i_blocks,
4120 reserved_blocks,
4121 atomic_read(&fi->i_compr_blocks));
4122 }
4123
4124 return ret;
4125 }
4126
f2fs_secure_erase(struct block_device * bdev,struct inode * inode,pgoff_t off,block_t block,block_t len,u32 flags)4127 static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode,
4128 pgoff_t off, block_t block, block_t len, u32 flags)
4129 {
4130 sector_t sector = SECTOR_FROM_BLOCK(block);
4131 sector_t nr_sects = SECTOR_FROM_BLOCK(len);
4132 int ret = 0;
4133
4134 if (flags & F2FS_TRIM_FILE_DISCARD) {
4135 if (bdev_max_secure_erase_sectors(bdev))
4136 ret = blkdev_issue_secure_erase(bdev, sector, nr_sects,
4137 GFP_NOFS);
4138 else
4139 ret = blkdev_issue_discard(bdev, sector, nr_sects,
4140 GFP_NOFS);
4141 }
4142
4143 if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) {
4144 if (IS_ENCRYPTED(inode))
4145 ret = fscrypt_zeroout_range(inode, off, block, len);
4146 else
4147 ret = blkdev_issue_zeroout(bdev, sector, nr_sects,
4148 GFP_NOFS, 0);
4149 }
4150
4151 return ret;
4152 }
4153
f2fs_sec_trim_file(struct file * filp,unsigned long arg)4154 static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
4155 {
4156 struct inode *inode = file_inode(filp);
4157 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4158 struct address_space *mapping = inode->i_mapping;
4159 struct block_device *prev_bdev = NULL;
4160 struct f2fs_sectrim_range range;
4161 pgoff_t index, pg_end, prev_index = 0;
4162 block_t prev_block = 0, len = 0;
4163 loff_t end_addr;
4164 bool to_end = false;
4165 int ret = 0;
4166
4167 if (!(filp->f_mode & FMODE_WRITE))
4168 return -EBADF;
4169
4170 if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg,
4171 sizeof(range)))
4172 return -EFAULT;
4173
4174 if (range.flags == 0 || (range.flags & ~F2FS_TRIM_FILE_MASK) ||
4175 !S_ISREG(inode->i_mode))
4176 return -EINVAL;
4177
4178 if (((range.flags & F2FS_TRIM_FILE_DISCARD) &&
4179 !f2fs_hw_support_discard(sbi)) ||
4180 ((range.flags & F2FS_TRIM_FILE_ZEROOUT) &&
4181 IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi)))
4182 return -EOPNOTSUPP;
4183
4184 ret = mnt_want_write_file(filp);
4185 if (ret)
4186 return ret;
4187 inode_lock(inode);
4188
4189 if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) ||
4190 range.start >= inode->i_size) {
4191 ret = -EINVAL;
4192 goto err;
4193 }
4194
4195 if (range.len == 0)
4196 goto err;
4197
4198 if (inode->i_size - range.start > range.len) {
4199 end_addr = range.start + range.len;
4200 } else {
4201 end_addr = range.len == (u64)-1 ?
4202 sbi->sb->s_maxbytes : inode->i_size;
4203 to_end = true;
4204 }
4205
4206 if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) ||
4207 (!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) {
4208 ret = -EINVAL;
4209 goto err;
4210 }
4211
4212 index = F2FS_BYTES_TO_BLK(range.start);
4213 pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE);
4214
4215 ret = f2fs_convert_inline_inode(inode);
4216 if (ret)
4217 goto err;
4218
4219 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
4220 filemap_invalidate_lock(mapping);
4221
4222 ret = filemap_write_and_wait_range(mapping, range.start,
4223 to_end ? LLONG_MAX : end_addr - 1);
4224 if (ret)
4225 goto out;
4226
4227 truncate_inode_pages_range(mapping, range.start,
4228 to_end ? -1 : end_addr - 1);
4229
4230 while (index < pg_end) {
4231 struct dnode_of_data dn;
4232 pgoff_t end_offset, count;
4233 int i;
4234
4235 set_new_dnode(&dn, inode, NULL, NULL, 0);
4236 ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
4237 if (ret) {
4238 if (ret == -ENOENT) {
4239 index = f2fs_get_next_page_offset(&dn, index);
4240 continue;
4241 }
4242 goto out;
4243 }
4244
4245 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
4246 count = min(end_offset - dn.ofs_in_node, pg_end - index);
4247 for (i = 0; i < count; i++, index++, dn.ofs_in_node++) {
4248 struct block_device *cur_bdev;
4249 block_t blkaddr = f2fs_data_blkaddr(&dn);
4250
4251 if (!__is_valid_data_blkaddr(blkaddr))
4252 continue;
4253
4254 if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
4255 DATA_GENERIC_ENHANCE)) {
4256 ret = -EFSCORRUPTED;
4257 f2fs_put_dnode(&dn);
4258 goto out;
4259 }
4260
4261 cur_bdev = f2fs_target_device(sbi, blkaddr, NULL);
4262 if (f2fs_is_multi_device(sbi)) {
4263 int di = f2fs_target_device_index(sbi, blkaddr);
4264
4265 blkaddr -= FDEV(di).start_blk;
4266 }
4267
4268 if (len) {
4269 if (prev_bdev == cur_bdev &&
4270 index == prev_index + len &&
4271 blkaddr == prev_block + len) {
4272 len++;
4273 } else {
4274 ret = f2fs_secure_erase(prev_bdev,
4275 inode, prev_index, prev_block,
4276 len, range.flags);
4277 if (ret) {
4278 f2fs_put_dnode(&dn);
4279 goto out;
4280 }
4281
4282 len = 0;
4283 }
4284 }
4285
4286 if (!len) {
4287 prev_bdev = cur_bdev;
4288 prev_index = index;
4289 prev_block = blkaddr;
4290 len = 1;
4291 }
4292 }
4293
4294 f2fs_put_dnode(&dn);
4295
4296 if (fatal_signal_pending(current)) {
4297 ret = -EINTR;
4298 goto out;
4299 }
4300 cond_resched();
4301 }
4302
4303 if (len)
4304 ret = f2fs_secure_erase(prev_bdev, inode, prev_index,
4305 prev_block, len, range.flags);
4306 f2fs_update_time(sbi, REQ_TIME);
4307 out:
4308 filemap_invalidate_unlock(mapping);
4309 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
4310 err:
4311 inode_unlock(inode);
4312 mnt_drop_write_file(filp);
4313
4314 return ret;
4315 }
4316
f2fs_ioc_get_compress_option(struct file * filp,unsigned long arg)4317 static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg)
4318 {
4319 struct inode *inode = file_inode(filp);
4320 struct f2fs_comp_option option;
4321
4322 if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
4323 return -EOPNOTSUPP;
4324
4325 inode_lock_shared(inode);
4326
4327 if (!f2fs_compressed_file(inode)) {
4328 inode_unlock_shared(inode);
4329 return -ENODATA;
4330 }
4331
4332 option.algorithm = F2FS_I(inode)->i_compress_algorithm;
4333 option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size;
4334
4335 inode_unlock_shared(inode);
4336
4337 if (copy_to_user((struct f2fs_comp_option __user *)arg, &option,
4338 sizeof(option)))
4339 return -EFAULT;
4340
4341 return 0;
4342 }
4343
f2fs_ioc_set_compress_option(struct file * filp,unsigned long arg)4344 static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
4345 {
4346 struct inode *inode = file_inode(filp);
4347 struct f2fs_inode_info *fi = F2FS_I(inode);
4348 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4349 struct f2fs_comp_option option;
4350 int ret = 0;
4351
4352 if (!f2fs_sb_has_compression(sbi))
4353 return -EOPNOTSUPP;
4354
4355 if (!(filp->f_mode & FMODE_WRITE))
4356 return -EBADF;
4357
4358 if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg,
4359 sizeof(option)))
4360 return -EFAULT;
4361
4362 if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
4363 option.log_cluster_size > MAX_COMPRESS_LOG_SIZE ||
4364 option.algorithm >= COMPRESS_MAX)
4365 return -EINVAL;
4366
4367 ret = mnt_want_write_file(filp);
4368 if (ret)
4369 return ret;
4370 inode_lock(inode);
4371
4372 f2fs_down_write(&F2FS_I(inode)->i_sem);
4373 if (!f2fs_compressed_file(inode)) {
4374 ret = -EINVAL;
4375 goto out;
4376 }
4377
4378 if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) {
4379 ret = -EBUSY;
4380 goto out;
4381 }
4382
4383 if (F2FS_HAS_BLOCKS(inode)) {
4384 ret = -EFBIG;
4385 goto out;
4386 }
4387
4388 fi->i_compress_algorithm = option.algorithm;
4389 fi->i_log_cluster_size = option.log_cluster_size;
4390 fi->i_cluster_size = BIT(option.log_cluster_size);
4391 /* Set default level */
4392 if (fi->i_compress_algorithm == COMPRESS_ZSTD)
4393 fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
4394 else
4395 fi->i_compress_level = 0;
4396 /* Adjust mount option level */
4397 if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm &&
4398 F2FS_OPTION(sbi).compress_level)
4399 fi->i_compress_level = F2FS_OPTION(sbi).compress_level;
4400 f2fs_mark_inode_dirty_sync(inode, true);
4401
4402 if (!f2fs_is_compress_backend_ready(inode))
4403 f2fs_warn(sbi, "compression algorithm is successfully set, "
4404 "but current kernel doesn't support this algorithm.");
4405 out:
4406 f2fs_up_write(&fi->i_sem);
4407 inode_unlock(inode);
4408 mnt_drop_write_file(filp);
4409
4410 return ret;
4411 }
4412
redirty_blocks(struct inode * inode,pgoff_t page_idx,int len)4413 static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
4414 {
4415 DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx);
4416 struct address_space *mapping = inode->i_mapping;
4417 struct folio *folio;
4418 pgoff_t redirty_idx = page_idx;
4419 int page_len = 0, ret = 0;
4420
4421 page_cache_ra_unbounded(&ractl, len, 0);
4422
4423 do {
4424 folio = read_cache_folio(mapping, page_idx, NULL, NULL);
4425 if (IS_ERR(folio)) {
4426 ret = PTR_ERR(folio);
4427 break;
4428 }
4429 page_len += folio_nr_pages(folio) - (page_idx - folio->index);
4430 page_idx = folio_next_index(folio);
4431 } while (page_len < len);
4432
4433 do {
4434 folio = filemap_lock_folio(mapping, redirty_idx);
4435
4436 /* It will never fail, when folio has pinned above */
4437 f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(folio));
4438
4439 f2fs_folio_wait_writeback(folio, DATA, true, true);
4440
4441 folio_mark_dirty(folio);
4442 folio_set_f2fs_gcing(folio);
4443 redirty_idx = folio_next_index(folio);
4444 folio_unlock(folio);
4445 folio_put_refs(folio, 2);
4446 } while (redirty_idx < page_idx);
4447
4448 return ret;
4449 }
4450
f2fs_ioc_decompress_file(struct file * filp)4451 static int f2fs_ioc_decompress_file(struct file *filp)
4452 {
4453 struct inode *inode = file_inode(filp);
4454 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4455 struct f2fs_inode_info *fi = F2FS_I(inode);
4456 pgoff_t page_idx = 0, last_idx, cluster_idx;
4457 int ret;
4458
4459 if (!f2fs_sb_has_compression(sbi) ||
4460 F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
4461 return -EOPNOTSUPP;
4462
4463 if (!(filp->f_mode & FMODE_WRITE))
4464 return -EBADF;
4465
4466 f2fs_balance_fs(sbi, true);
4467
4468 ret = mnt_want_write_file(filp);
4469 if (ret)
4470 return ret;
4471 inode_lock(inode);
4472
4473 if (!f2fs_is_compress_backend_ready(inode)) {
4474 ret = -EOPNOTSUPP;
4475 goto out;
4476 }
4477
4478 if (!f2fs_compressed_file(inode) ||
4479 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
4480 ret = -EINVAL;
4481 goto out;
4482 }
4483
4484 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
4485 if (ret)
4486 goto out;
4487
4488 if (!atomic_read(&fi->i_compr_blocks))
4489 goto out;
4490
4491 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
4492 last_idx >>= fi->i_log_cluster_size;
4493
4494 for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) {
4495 page_idx = cluster_idx << fi->i_log_cluster_size;
4496
4497 if (!f2fs_is_compressed_cluster(inode, page_idx))
4498 continue;
4499
4500 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size);
4501 if (ret < 0)
4502 break;
4503
4504 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) {
4505 ret = filemap_fdatawrite(inode->i_mapping);
4506 if (ret < 0)
4507 break;
4508 }
4509
4510 cond_resched();
4511 if (fatal_signal_pending(current)) {
4512 ret = -EINTR;
4513 break;
4514 }
4515 }
4516
4517 if (!ret)
4518 ret = filemap_write_and_wait_range(inode->i_mapping, 0,
4519 LLONG_MAX);
4520
4521 if (ret)
4522 f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.",
4523 __func__, ret);
4524 f2fs_update_time(sbi, REQ_TIME);
4525 out:
4526 inode_unlock(inode);
4527 mnt_drop_write_file(filp);
4528
4529 return ret;
4530 }
4531
f2fs_ioc_compress_file(struct file * filp)4532 static int f2fs_ioc_compress_file(struct file *filp)
4533 {
4534 struct inode *inode = file_inode(filp);
4535 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4536 struct f2fs_inode_info *fi = F2FS_I(inode);
4537 pgoff_t page_idx = 0, last_idx, cluster_idx;
4538 int ret;
4539
4540 if (!f2fs_sb_has_compression(sbi) ||
4541 F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
4542 return -EOPNOTSUPP;
4543
4544 if (!(filp->f_mode & FMODE_WRITE))
4545 return -EBADF;
4546
4547 f2fs_balance_fs(sbi, true);
4548
4549 ret = mnt_want_write_file(filp);
4550 if (ret)
4551 return ret;
4552 inode_lock(inode);
4553
4554 if (!f2fs_is_compress_backend_ready(inode)) {
4555 ret = -EOPNOTSUPP;
4556 goto out;
4557 }
4558
4559 if (!f2fs_compressed_file(inode) ||
4560 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
4561 ret = -EINVAL;
4562 goto out;
4563 }
4564
4565 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
4566 if (ret)
4567 goto out;
4568
4569 set_inode_flag(inode, FI_ENABLE_COMPRESS);
4570
4571 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
4572 last_idx >>= fi->i_log_cluster_size;
4573
4574 for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) {
4575 page_idx = cluster_idx << fi->i_log_cluster_size;
4576
4577 if (f2fs_is_sparse_cluster(inode, page_idx))
4578 continue;
4579
4580 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size);
4581 if (ret < 0)
4582 break;
4583
4584 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) {
4585 ret = filemap_fdatawrite(inode->i_mapping);
4586 if (ret < 0)
4587 break;
4588 }
4589
4590 cond_resched();
4591 if (fatal_signal_pending(current)) {
4592 ret = -EINTR;
4593 break;
4594 }
4595 }
4596
4597 if (!ret)
4598 ret = filemap_write_and_wait_range(inode->i_mapping, 0,
4599 LLONG_MAX);
4600
4601 clear_inode_flag(inode, FI_ENABLE_COMPRESS);
4602
4603 if (ret)
4604 f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.",
4605 __func__, ret);
4606 f2fs_update_time(sbi, REQ_TIME);
4607 out:
4608 inode_unlock(inode);
4609 mnt_drop_write_file(filp);
4610
4611 return ret;
4612 }
4613
__f2fs_ioctl(struct file * filp,unsigned int cmd,unsigned long arg)4614 static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
4615 {
4616 switch (cmd) {
4617 case FS_IOC_GETVERSION:
4618 return f2fs_ioc_getversion(filp, arg);
4619 case F2FS_IOC_START_ATOMIC_WRITE:
4620 return f2fs_ioc_start_atomic_write(filp, false);
4621 case F2FS_IOC_START_ATOMIC_REPLACE:
4622 return f2fs_ioc_start_atomic_write(filp, true);
4623 case F2FS_IOC_COMMIT_ATOMIC_WRITE:
4624 return f2fs_ioc_commit_atomic_write(filp);
4625 case F2FS_IOC_ABORT_ATOMIC_WRITE:
4626 return f2fs_ioc_abort_atomic_write(filp);
4627 case F2FS_IOC_START_VOLATILE_WRITE:
4628 case F2FS_IOC_RELEASE_VOLATILE_WRITE:
4629 return -EOPNOTSUPP;
4630 case F2FS_IOC_SHUTDOWN:
4631 return f2fs_ioc_shutdown(filp, arg);
4632 case FITRIM:
4633 return f2fs_ioc_fitrim(filp, arg);
4634 case FS_IOC_SET_ENCRYPTION_POLICY:
4635 return f2fs_ioc_set_encryption_policy(filp, arg);
4636 case FS_IOC_GET_ENCRYPTION_POLICY:
4637 return f2fs_ioc_get_encryption_policy(filp, arg);
4638 case FS_IOC_GET_ENCRYPTION_PWSALT:
4639 return f2fs_ioc_get_encryption_pwsalt(filp, arg);
4640 case FS_IOC_GET_ENCRYPTION_POLICY_EX:
4641 return f2fs_ioc_get_encryption_policy_ex(filp, arg);
4642 case FS_IOC_ADD_ENCRYPTION_KEY:
4643 return f2fs_ioc_add_encryption_key(filp, arg);
4644 case FS_IOC_REMOVE_ENCRYPTION_KEY:
4645 return f2fs_ioc_remove_encryption_key(filp, arg);
4646 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
4647 return f2fs_ioc_remove_encryption_key_all_users(filp, arg);
4648 case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
4649 return f2fs_ioc_get_encryption_key_status(filp, arg);
4650 case FS_IOC_GET_ENCRYPTION_NONCE:
4651 return f2fs_ioc_get_encryption_nonce(filp, arg);
4652 case F2FS_IOC_GARBAGE_COLLECT:
4653 return f2fs_ioc_gc(filp, arg);
4654 case F2FS_IOC_GARBAGE_COLLECT_RANGE:
4655 return f2fs_ioc_gc_range(filp, arg);
4656 case F2FS_IOC_WRITE_CHECKPOINT:
4657 return f2fs_ioc_write_checkpoint(filp);
4658 case F2FS_IOC_DEFRAGMENT:
4659 return f2fs_ioc_defragment(filp, arg);
4660 case F2FS_IOC_MOVE_RANGE:
4661 return f2fs_ioc_move_range(filp, arg);
4662 case F2FS_IOC_FLUSH_DEVICE:
4663 return f2fs_ioc_flush_device(filp, arg);
4664 case F2FS_IOC_GET_FEATURES:
4665 return f2fs_ioc_get_features(filp, arg);
4666 case F2FS_IOC_GET_PIN_FILE:
4667 return f2fs_ioc_get_pin_file(filp, arg);
4668 case F2FS_IOC_SET_PIN_FILE:
4669 return f2fs_ioc_set_pin_file(filp, arg);
4670 case F2FS_IOC_PRECACHE_EXTENTS:
4671 return f2fs_ioc_precache_extents(filp);
4672 case F2FS_IOC_RESIZE_FS:
4673 return f2fs_ioc_resize_fs(filp, arg);
4674 case FS_IOC_ENABLE_VERITY:
4675 return f2fs_ioc_enable_verity(filp, arg);
4676 case FS_IOC_MEASURE_VERITY:
4677 return f2fs_ioc_measure_verity(filp, arg);
4678 case FS_IOC_READ_VERITY_METADATA:
4679 return f2fs_ioc_read_verity_metadata(filp, arg);
4680 case FS_IOC_GETFSLABEL:
4681 return f2fs_ioc_getfslabel(filp, arg);
4682 case FS_IOC_SETFSLABEL:
4683 return f2fs_ioc_setfslabel(filp, arg);
4684 case F2FS_IOC_GET_COMPRESS_BLOCKS:
4685 return f2fs_ioc_get_compress_blocks(filp, arg);
4686 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
4687 return f2fs_release_compress_blocks(filp, arg);
4688 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
4689 return f2fs_reserve_compress_blocks(filp, arg);
4690 case F2FS_IOC_SEC_TRIM_FILE:
4691 return f2fs_sec_trim_file(filp, arg);
4692 case F2FS_IOC_GET_COMPRESS_OPTION:
4693 return f2fs_ioc_get_compress_option(filp, arg);
4694 case F2FS_IOC_SET_COMPRESS_OPTION:
4695 return f2fs_ioc_set_compress_option(filp, arg);
4696 case F2FS_IOC_DECOMPRESS_FILE:
4697 return f2fs_ioc_decompress_file(filp);
4698 case F2FS_IOC_COMPRESS_FILE:
4699 return f2fs_ioc_compress_file(filp);
4700 case F2FS_IOC_GET_DEV_ALIAS_FILE:
4701 return f2fs_ioc_get_dev_alias_file(filp, arg);
4702 case F2FS_IOC_IO_PRIO:
4703 return f2fs_ioc_io_prio(filp, arg);
4704 default:
4705 return -ENOTTY;
4706 }
4707 }
4708
f2fs_ioctl(struct file * filp,unsigned int cmd,unsigned long arg)4709 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
4710 {
4711 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
4712 return -EIO;
4713 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp))))
4714 return -ENOSPC;
4715
4716 return __f2fs_ioctl(filp, cmd, arg);
4717 }
4718
4719 /*
4720 * Return %true if the given read or write request should use direct I/O, or
4721 * %false if it should use buffered I/O.
4722 */
f2fs_should_use_dio(struct inode * inode,struct kiocb * iocb,struct iov_iter * iter)4723 static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb,
4724 struct iov_iter *iter)
4725 {
4726 unsigned int align;
4727
4728 if (!(iocb->ki_flags & IOCB_DIRECT))
4729 return false;
4730
4731 if (f2fs_force_buffered_io(inode, iov_iter_rw(iter)))
4732 return false;
4733
4734 /*
4735 * Direct I/O not aligned to the disk's logical_block_size will be
4736 * attempted, but will fail with -EINVAL.
4737 *
4738 * f2fs additionally requires that direct I/O be aligned to the
4739 * filesystem block size, which is often a stricter requirement.
4740 * However, f2fs traditionally falls back to buffered I/O on requests
4741 * that are logical_block_size-aligned but not fs-block aligned.
4742 *
4743 * The below logic implements this behavior.
4744 */
4745 align = iocb->ki_pos | iov_iter_alignment(iter);
4746 if (!IS_ALIGNED(align, i_blocksize(inode)) &&
4747 IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev)))
4748 return false;
4749
4750 return true;
4751 }
4752
f2fs_dio_read_end_io(struct kiocb * iocb,ssize_t size,int error,unsigned int flags)4753 static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error,
4754 unsigned int flags)
4755 {
4756 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
4757
4758 dec_page_count(sbi, F2FS_DIO_READ);
4759 if (error)
4760 return error;
4761 f2fs_update_iostat(sbi, NULL, APP_DIRECT_READ_IO, size);
4762 return 0;
4763 }
4764
4765 static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = {
4766 .end_io = f2fs_dio_read_end_io,
4767 };
4768
f2fs_dio_read_iter(struct kiocb * iocb,struct iov_iter * to)4769 static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
4770 {
4771 struct file *file = iocb->ki_filp;
4772 struct inode *inode = file_inode(file);
4773 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4774 struct f2fs_inode_info *fi = F2FS_I(inode);
4775 const loff_t pos = iocb->ki_pos;
4776 const size_t count = iov_iter_count(to);
4777 struct iomap_dio *dio;
4778 ssize_t ret;
4779
4780 if (count == 0)
4781 return 0; /* skip atime update */
4782
4783 trace_f2fs_direct_IO_enter(inode, iocb, count, READ);
4784
4785 if (iocb->ki_flags & IOCB_NOWAIT) {
4786 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
4787 ret = -EAGAIN;
4788 goto out;
4789 }
4790 } else {
4791 f2fs_down_read(&fi->i_gc_rwsem[READ]);
4792 }
4793
4794 /* dio is not compatible w/ atomic file */
4795 if (f2fs_is_atomic_file(inode)) {
4796 f2fs_up_read(&fi->i_gc_rwsem[READ]);
4797 ret = -EOPNOTSUPP;
4798 goto out;
4799 }
4800
4801 /*
4802 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
4803 * the higher-level function iomap_dio_rw() in order to ensure that the
4804 * F2FS_DIO_READ counter will be decremented correctly in all cases.
4805 */
4806 inc_page_count(sbi, F2FS_DIO_READ);
4807 dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops,
4808 &f2fs_iomap_dio_read_ops, 0, NULL, 0);
4809 if (IS_ERR_OR_NULL(dio)) {
4810 ret = PTR_ERR_OR_ZERO(dio);
4811 if (ret != -EIOCBQUEUED)
4812 dec_page_count(sbi, F2FS_DIO_READ);
4813 } else {
4814 ret = iomap_dio_complete(dio);
4815 }
4816
4817 f2fs_up_read(&fi->i_gc_rwsem[READ]);
4818
4819 file_accessed(file);
4820 out:
4821 trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
4822 return ret;
4823 }
4824
f2fs_trace_rw_file_path(struct file * file,loff_t pos,size_t count,int rw)4825 static void f2fs_trace_rw_file_path(struct file *file, loff_t pos, size_t count,
4826 int rw)
4827 {
4828 struct inode *inode = file_inode(file);
4829 char *buf, *path;
4830
4831 buf = f2fs_getname(F2FS_I_SB(inode));
4832 if (!buf)
4833 return;
4834 path = dentry_path_raw(file_dentry(file), buf, PATH_MAX);
4835 if (IS_ERR(path))
4836 goto free_buf;
4837 if (rw == WRITE)
4838 trace_f2fs_datawrite_start(inode, pos, count,
4839 current->pid, path, current->comm);
4840 else
4841 trace_f2fs_dataread_start(inode, pos, count,
4842 current->pid, path, current->comm);
4843 free_buf:
4844 f2fs_putname(buf);
4845 }
4846
f2fs_file_read_iter(struct kiocb * iocb,struct iov_iter * to)4847 static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
4848 {
4849 struct inode *inode = file_inode(iocb->ki_filp);
4850 const loff_t pos = iocb->ki_pos;
4851 ssize_t ret;
4852 bool dio;
4853
4854 if (!f2fs_is_compress_backend_ready(inode))
4855 return -EOPNOTSUPP;
4856
4857 if (trace_f2fs_dataread_start_enabled())
4858 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos,
4859 iov_iter_count(to), READ);
4860
4861 dio = f2fs_should_use_dio(inode, iocb, to);
4862
4863 /* In LFS mode, if there is inflight dio, wait for its completion */
4864 if (f2fs_lfs_mode(F2FS_I_SB(inode)) &&
4865 get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) &&
4866 (!f2fs_is_pinned_file(inode) || !dio))
4867 inode_dio_wait(inode);
4868
4869 if (dio) {
4870 ret = f2fs_dio_read_iter(iocb, to);
4871 } else {
4872 ret = filemap_read(iocb, to, 0);
4873 if (ret > 0)
4874 f2fs_update_iostat(F2FS_I_SB(inode), inode,
4875 APP_BUFFERED_READ_IO, ret);
4876 }
4877 trace_f2fs_dataread_end(inode, pos, ret);
4878 return ret;
4879 }
4880
f2fs_file_splice_read(struct file * in,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)4881 static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos,
4882 struct pipe_inode_info *pipe,
4883 size_t len, unsigned int flags)
4884 {
4885 struct inode *inode = file_inode(in);
4886 const loff_t pos = *ppos;
4887 ssize_t ret;
4888
4889 if (!f2fs_is_compress_backend_ready(inode))
4890 return -EOPNOTSUPP;
4891
4892 if (trace_f2fs_dataread_start_enabled())
4893 f2fs_trace_rw_file_path(in, pos, len, READ);
4894
4895 ret = filemap_splice_read(in, ppos, pipe, len, flags);
4896 if (ret > 0)
4897 f2fs_update_iostat(F2FS_I_SB(inode), inode,
4898 APP_BUFFERED_READ_IO, ret);
4899
4900 trace_f2fs_dataread_end(inode, pos, ret);
4901 return ret;
4902 }
4903
f2fs_write_checks(struct kiocb * iocb,struct iov_iter * from)4904 static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
4905 {
4906 struct file *file = iocb->ki_filp;
4907 struct inode *inode = file_inode(file);
4908 ssize_t count;
4909 int err;
4910
4911 if (IS_IMMUTABLE(inode))
4912 return -EPERM;
4913
4914 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
4915 return -EPERM;
4916
4917 count = generic_write_checks(iocb, from);
4918 if (count <= 0)
4919 return count;
4920
4921 err = file_modified(file);
4922 if (err)
4923 return err;
4924
4925 f2fs_zero_post_eof_page(inode,
4926 iocb->ki_pos + iov_iter_count(from), true);
4927 return count;
4928 }
4929
4930 /*
4931 * Preallocate blocks for a write request, if it is possible and helpful to do
4932 * so. Returns a positive number if blocks may have been preallocated, 0 if no
4933 * blocks were preallocated, or a negative errno value if something went
4934 * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the
4935 * requested blocks (not just some of them) have been allocated.
4936 */
f2fs_preallocate_blocks(struct kiocb * iocb,struct iov_iter * iter,bool dio)4937 static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
4938 bool dio)
4939 {
4940 struct inode *inode = file_inode(iocb->ki_filp);
4941 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4942 const loff_t pos = iocb->ki_pos;
4943 const size_t count = iov_iter_count(iter);
4944 struct f2fs_map_blocks map = {};
4945 int flag;
4946 int ret;
4947
4948 /* If it will be an out-of-place direct write, don't bother. */
4949 if (dio && f2fs_lfs_mode(sbi))
4950 return 0;
4951 /*
4952 * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
4953 * buffered IO, if DIO meets any holes.
4954 */
4955 if (dio && i_size_read(inode) &&
4956 (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
4957 return 0;
4958
4959 /* No-wait I/O can't allocate blocks. */
4960 if (iocb->ki_flags & IOCB_NOWAIT)
4961 return 0;
4962
4963 /* If it will be a short write, don't bother. */
4964 if (fault_in_iov_iter_readable(iter, count))
4965 return 0;
4966
4967 if (f2fs_has_inline_data(inode)) {
4968 /* If the data will fit inline, don't bother. */
4969 if (pos + count <= MAX_INLINE_DATA(inode))
4970 return 0;
4971 ret = f2fs_convert_inline_inode(inode);
4972 if (ret)
4973 return ret;
4974 }
4975
4976 /* Do not preallocate blocks that will be written partially in 4KB. */
4977 map.m_lblk = F2FS_BLK_ALIGN(pos);
4978 map.m_len = F2FS_BYTES_TO_BLK(pos + count);
4979 if (map.m_len > map.m_lblk)
4980 map.m_len -= map.m_lblk;
4981 else
4982 return 0;
4983
4984 if (!IS_DEVICE_ALIASING(inode))
4985 map.m_may_create = true;
4986 if (dio) {
4987 map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi,
4988 inode->i_write_hint);
4989 flag = F2FS_GET_BLOCK_PRE_DIO;
4990 } else {
4991 map.m_seg_type = NO_CHECK_TYPE;
4992 flag = F2FS_GET_BLOCK_PRE_AIO;
4993 }
4994
4995 ret = f2fs_map_blocks(inode, &map, flag);
4996 /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */
4997 if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0))
4998 return ret;
4999 if (ret == 0)
5000 set_inode_flag(inode, FI_PREALLOCATED_ALL);
5001 return map.m_len;
5002 }
5003
f2fs_buffered_write_iter(struct kiocb * iocb,struct iov_iter * from)5004 static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
5005 struct iov_iter *from)
5006 {
5007 struct file *file = iocb->ki_filp;
5008 struct inode *inode = file_inode(file);
5009 ssize_t ret;
5010
5011 if (iocb->ki_flags & IOCB_NOWAIT)
5012 return -EOPNOTSUPP;
5013
5014 ret = generic_perform_write(iocb, from);
5015
5016 if (ret > 0) {
5017 f2fs_update_iostat(F2FS_I_SB(inode), inode,
5018 APP_BUFFERED_IO, ret);
5019 }
5020 return ret;
5021 }
5022
f2fs_dio_write_end_io(struct kiocb * iocb,ssize_t size,int error,unsigned int flags)5023 static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
5024 unsigned int flags)
5025 {
5026 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
5027
5028 dec_page_count(sbi, F2FS_DIO_WRITE);
5029 if (error)
5030 return error;
5031 f2fs_update_time(sbi, REQ_TIME);
5032 f2fs_update_iostat(sbi, NULL, APP_DIRECT_IO, size);
5033 return 0;
5034 }
5035
f2fs_dio_write_submit_io(const struct iomap_iter * iter,struct bio * bio,loff_t file_offset)5036 static void f2fs_dio_write_submit_io(const struct iomap_iter *iter,
5037 struct bio *bio, loff_t file_offset)
5038 {
5039 struct inode *inode = iter->inode;
5040 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
5041 enum log_type type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint);
5042 enum temp_type temp = f2fs_get_segment_temp(sbi, type);
5043
5044 bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp);
5045 submit_bio(bio);
5046 }
5047
5048 static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = {
5049 .end_io = f2fs_dio_write_end_io,
5050 .submit_io = f2fs_dio_write_submit_io,
5051 };
5052
f2fs_flush_buffered_write(struct address_space * mapping,loff_t start_pos,loff_t end_pos)5053 static void f2fs_flush_buffered_write(struct address_space *mapping,
5054 loff_t start_pos, loff_t end_pos)
5055 {
5056 int ret;
5057
5058 ret = filemap_write_and_wait_range(mapping, start_pos, end_pos);
5059 if (ret < 0)
5060 return;
5061 invalidate_mapping_pages(mapping,
5062 start_pos >> PAGE_SHIFT,
5063 end_pos >> PAGE_SHIFT);
5064 }
5065
f2fs_dio_write_iter(struct kiocb * iocb,struct iov_iter * from,bool * may_need_sync)5066 static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
5067 bool *may_need_sync)
5068 {
5069 struct file *file = iocb->ki_filp;
5070 struct inode *inode = file_inode(file);
5071 struct f2fs_inode_info *fi = F2FS_I(inode);
5072 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
5073 const bool do_opu = f2fs_lfs_mode(sbi);
5074 const loff_t pos = iocb->ki_pos;
5075 const ssize_t count = iov_iter_count(from);
5076 unsigned int dio_flags;
5077 struct iomap_dio *dio;
5078 ssize_t ret;
5079
5080 trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE);
5081
5082 if (iocb->ki_flags & IOCB_NOWAIT) {
5083 /* f2fs_convert_inline_inode() and block allocation can block */
5084 if (f2fs_has_inline_data(inode) ||
5085 !f2fs_overwrite_io(inode, pos, count)) {
5086 ret = -EAGAIN;
5087 goto out;
5088 }
5089
5090 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
5091 ret = -EAGAIN;
5092 goto out;
5093 }
5094 if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
5095 f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
5096 ret = -EAGAIN;
5097 goto out;
5098 }
5099 } else {
5100 ret = f2fs_convert_inline_inode(inode);
5101 if (ret)
5102 goto out;
5103
5104 f2fs_down_read(&fi->i_gc_rwsem[WRITE]);
5105 if (do_opu)
5106 f2fs_down_read(&fi->i_gc_rwsem[READ]);
5107 }
5108
5109 /*
5110 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
5111 * the higher-level function iomap_dio_rw() in order to ensure that the
5112 * F2FS_DIO_WRITE counter will be decremented correctly in all cases.
5113 */
5114 inc_page_count(sbi, F2FS_DIO_WRITE);
5115 dio_flags = 0;
5116 if (pos + count > inode->i_size)
5117 dio_flags |= IOMAP_DIO_FORCE_WAIT;
5118 dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
5119 &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0);
5120 if (IS_ERR_OR_NULL(dio)) {
5121 ret = PTR_ERR_OR_ZERO(dio);
5122 if (ret == -ENOTBLK)
5123 ret = 0;
5124 if (ret != -EIOCBQUEUED)
5125 dec_page_count(sbi, F2FS_DIO_WRITE);
5126 } else {
5127 ret = iomap_dio_complete(dio);
5128 }
5129
5130 if (do_opu)
5131 f2fs_up_read(&fi->i_gc_rwsem[READ]);
5132 f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
5133
5134 if (ret < 0)
5135 goto out;
5136 if (pos + ret > inode->i_size)
5137 f2fs_i_size_write(inode, pos + ret);
5138 if (!do_opu)
5139 set_inode_flag(inode, FI_UPDATE_WRITE);
5140
5141 if (iov_iter_count(from)) {
5142 ssize_t ret2;
5143 loff_t bufio_start_pos = iocb->ki_pos;
5144
5145 /*
5146 * The direct write was partial, so we need to fall back to a
5147 * buffered write for the remainder.
5148 */
5149
5150 ret2 = f2fs_buffered_write_iter(iocb, from);
5151 if (iov_iter_count(from))
5152 f2fs_write_failed(inode, iocb->ki_pos);
5153 if (ret2 < 0)
5154 goto out;
5155
5156 /*
5157 * Ensure that the pagecache pages are written to disk and
5158 * invalidated to preserve the expected O_DIRECT semantics.
5159 */
5160 if (ret2 > 0) {
5161 loff_t bufio_end_pos = bufio_start_pos + ret2 - 1;
5162
5163 ret += ret2;
5164
5165 f2fs_flush_buffered_write(file->f_mapping,
5166 bufio_start_pos,
5167 bufio_end_pos);
5168 }
5169 } else {
5170 /* iomap_dio_rw() already handled the generic_write_sync(). */
5171 *may_need_sync = false;
5172 }
5173 out:
5174 trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret);
5175 return ret;
5176 }
5177
f2fs_file_write_iter(struct kiocb * iocb,struct iov_iter * from)5178 static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
5179 {
5180 struct inode *inode = file_inode(iocb->ki_filp);
5181 const loff_t orig_pos = iocb->ki_pos;
5182 const size_t orig_count = iov_iter_count(from);
5183 loff_t target_size;
5184 bool dio;
5185 bool may_need_sync = true;
5186 int preallocated;
5187 const loff_t pos = iocb->ki_pos;
5188 const ssize_t count = iov_iter_count(from);
5189 ssize_t ret;
5190
5191 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
5192 ret = -EIO;
5193 goto out;
5194 }
5195
5196 if (!f2fs_is_compress_backend_ready(inode)) {
5197 ret = -EOPNOTSUPP;
5198 goto out;
5199 }
5200
5201 if (iocb->ki_flags & IOCB_NOWAIT) {
5202 if (!inode_trylock(inode)) {
5203 ret = -EAGAIN;
5204 goto out;
5205 }
5206 } else {
5207 inode_lock(inode);
5208 }
5209
5210 if (f2fs_is_pinned_file(inode) &&
5211 !f2fs_overwrite_io(inode, pos, count)) {
5212 ret = -EIO;
5213 goto out_unlock;
5214 }
5215
5216 ret = f2fs_write_checks(iocb, from);
5217 if (ret <= 0)
5218 goto out_unlock;
5219
5220 /* Determine whether we will do a direct write or a buffered write. */
5221 dio = f2fs_should_use_dio(inode, iocb, from);
5222
5223 /* dio is not compatible w/ atomic write */
5224 if (dio && f2fs_is_atomic_file(inode)) {
5225 ret = -EOPNOTSUPP;
5226 goto out_unlock;
5227 }
5228
5229 /* Possibly preallocate the blocks for the write. */
5230 target_size = iocb->ki_pos + iov_iter_count(from);
5231 preallocated = f2fs_preallocate_blocks(iocb, from, dio);
5232 if (preallocated < 0) {
5233 ret = preallocated;
5234 } else {
5235 if (trace_f2fs_datawrite_start_enabled())
5236 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos,
5237 orig_count, WRITE);
5238
5239 /* Do the actual write. */
5240 ret = dio ?
5241 f2fs_dio_write_iter(iocb, from, &may_need_sync) :
5242 f2fs_buffered_write_iter(iocb, from);
5243
5244 trace_f2fs_datawrite_end(inode, orig_pos, ret);
5245 }
5246
5247 /* Don't leave any preallocated blocks around past i_size. */
5248 if (preallocated && i_size_read(inode) < target_size) {
5249 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
5250 filemap_invalidate_lock(inode->i_mapping);
5251 if (!f2fs_truncate(inode))
5252 file_dont_truncate(inode);
5253 filemap_invalidate_unlock(inode->i_mapping);
5254 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
5255 } else {
5256 file_dont_truncate(inode);
5257 }
5258
5259 clear_inode_flag(inode, FI_PREALLOCATED_ALL);
5260 out_unlock:
5261 inode_unlock(inode);
5262 out:
5263 trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
5264
5265 if (ret > 0 && may_need_sync)
5266 ret = generic_write_sync(iocb, ret);
5267
5268 /* If buffered IO was forced, flush and drop the data from
5269 * the page cache to preserve O_DIRECT semantics
5270 */
5271 if (ret > 0 && !dio && (iocb->ki_flags & IOCB_DIRECT))
5272 f2fs_flush_buffered_write(iocb->ki_filp->f_mapping,
5273 orig_pos,
5274 orig_pos + ret - 1);
5275
5276 return ret;
5277 }
5278
f2fs_file_fadvise(struct file * filp,loff_t offset,loff_t len,int advice)5279 static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
5280 int advice)
5281 {
5282 struct address_space *mapping;
5283 struct backing_dev_info *bdi;
5284 struct inode *inode = file_inode(filp);
5285 int err;
5286
5287 if (advice == POSIX_FADV_SEQUENTIAL) {
5288 if (S_ISFIFO(inode->i_mode))
5289 return -ESPIPE;
5290
5291 mapping = filp->f_mapping;
5292 if (!mapping || len < 0)
5293 return -EINVAL;
5294
5295 bdi = inode_to_bdi(mapping->host);
5296 filp->f_ra.ra_pages = bdi->ra_pages *
5297 F2FS_I_SB(inode)->seq_file_ra_mul;
5298 spin_lock(&filp->f_lock);
5299 filp->f_mode &= ~FMODE_RANDOM;
5300 spin_unlock(&filp->f_lock);
5301 return 0;
5302 } else if (advice == POSIX_FADV_WILLNEED && offset == 0) {
5303 /* Load extent cache at the first readahead. */
5304 f2fs_precache_extents(inode);
5305 }
5306
5307 err = generic_fadvise(filp, offset, len, advice);
5308 if (err)
5309 return err;
5310
5311 if (advice == POSIX_FADV_DONTNEED &&
5312 (test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) &&
5313 f2fs_compressed_file(inode)))
5314 f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino);
5315 else if (advice == POSIX_FADV_NOREUSE)
5316 err = f2fs_keep_noreuse_range(inode, offset, len);
5317 return err;
5318 }
5319
5320 #ifdef CONFIG_COMPAT
5321 struct compat_f2fs_gc_range {
5322 u32 sync;
5323 compat_u64 start;
5324 compat_u64 len;
5325 };
5326 #define F2FS_IOC32_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11,\
5327 struct compat_f2fs_gc_range)
5328
f2fs_compat_ioc_gc_range(struct file * file,unsigned long arg)5329 static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg)
5330 {
5331 struct compat_f2fs_gc_range __user *urange;
5332 struct f2fs_gc_range range;
5333 int err;
5334
5335 urange = compat_ptr(arg);
5336 err = get_user(range.sync, &urange->sync);
5337 err |= get_user(range.start, &urange->start);
5338 err |= get_user(range.len, &urange->len);
5339 if (err)
5340 return -EFAULT;
5341
5342 return __f2fs_ioc_gc_range(file, &range);
5343 }
5344
5345 struct compat_f2fs_move_range {
5346 u32 dst_fd;
5347 compat_u64 pos_in;
5348 compat_u64 pos_out;
5349 compat_u64 len;
5350 };
5351 #define F2FS_IOC32_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \
5352 struct compat_f2fs_move_range)
5353
f2fs_compat_ioc_move_range(struct file * file,unsigned long arg)5354 static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg)
5355 {
5356 struct compat_f2fs_move_range __user *urange;
5357 struct f2fs_move_range range;
5358 int err;
5359
5360 urange = compat_ptr(arg);
5361 err = get_user(range.dst_fd, &urange->dst_fd);
5362 err |= get_user(range.pos_in, &urange->pos_in);
5363 err |= get_user(range.pos_out, &urange->pos_out);
5364 err |= get_user(range.len, &urange->len);
5365 if (err)
5366 return -EFAULT;
5367
5368 return __f2fs_ioc_move_range(file, &range);
5369 }
5370
f2fs_compat_ioctl(struct file * file,unsigned int cmd,unsigned long arg)5371 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
5372 {
5373 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
5374 return -EIO;
5375 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file))))
5376 return -ENOSPC;
5377
5378 switch (cmd) {
5379 case FS_IOC32_GETVERSION:
5380 cmd = FS_IOC_GETVERSION;
5381 break;
5382 case F2FS_IOC32_GARBAGE_COLLECT_RANGE:
5383 return f2fs_compat_ioc_gc_range(file, arg);
5384 case F2FS_IOC32_MOVE_RANGE:
5385 return f2fs_compat_ioc_move_range(file, arg);
5386 case F2FS_IOC_START_ATOMIC_WRITE:
5387 case F2FS_IOC_START_ATOMIC_REPLACE:
5388 case F2FS_IOC_COMMIT_ATOMIC_WRITE:
5389 case F2FS_IOC_START_VOLATILE_WRITE:
5390 case F2FS_IOC_RELEASE_VOLATILE_WRITE:
5391 case F2FS_IOC_ABORT_ATOMIC_WRITE:
5392 case F2FS_IOC_SHUTDOWN:
5393 case FITRIM:
5394 case FS_IOC_SET_ENCRYPTION_POLICY:
5395 case FS_IOC_GET_ENCRYPTION_PWSALT:
5396 case FS_IOC_GET_ENCRYPTION_POLICY:
5397 case FS_IOC_GET_ENCRYPTION_POLICY_EX:
5398 case FS_IOC_ADD_ENCRYPTION_KEY:
5399 case FS_IOC_REMOVE_ENCRYPTION_KEY:
5400 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
5401 case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
5402 case FS_IOC_GET_ENCRYPTION_NONCE:
5403 case F2FS_IOC_GARBAGE_COLLECT:
5404 case F2FS_IOC_WRITE_CHECKPOINT:
5405 case F2FS_IOC_DEFRAGMENT:
5406 case F2FS_IOC_FLUSH_DEVICE:
5407 case F2FS_IOC_GET_FEATURES:
5408 case F2FS_IOC_GET_PIN_FILE:
5409 case F2FS_IOC_SET_PIN_FILE:
5410 case F2FS_IOC_PRECACHE_EXTENTS:
5411 case F2FS_IOC_RESIZE_FS:
5412 case FS_IOC_ENABLE_VERITY:
5413 case FS_IOC_MEASURE_VERITY:
5414 case FS_IOC_READ_VERITY_METADATA:
5415 case FS_IOC_GETFSLABEL:
5416 case FS_IOC_SETFSLABEL:
5417 case F2FS_IOC_GET_COMPRESS_BLOCKS:
5418 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
5419 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
5420 case F2FS_IOC_SEC_TRIM_FILE:
5421 case F2FS_IOC_GET_COMPRESS_OPTION:
5422 case F2FS_IOC_SET_COMPRESS_OPTION:
5423 case F2FS_IOC_DECOMPRESS_FILE:
5424 case F2FS_IOC_COMPRESS_FILE:
5425 case F2FS_IOC_GET_DEV_ALIAS_FILE:
5426 case F2FS_IOC_IO_PRIO:
5427 break;
5428 default:
5429 return -ENOIOCTLCMD;
5430 }
5431 return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
5432 }
5433 #endif
5434
5435 const struct file_operations f2fs_file_operations = {
5436 .llseek = f2fs_llseek,
5437 .read_iter = f2fs_file_read_iter,
5438 .write_iter = f2fs_file_write_iter,
5439 .iopoll = iocb_bio_iopoll,
5440 .open = f2fs_file_open,
5441 .release = f2fs_release_file,
5442 .mmap_prepare = f2fs_file_mmap_prepare,
5443 .flush = f2fs_file_flush,
5444 .fsync = f2fs_sync_file,
5445 .fallocate = f2fs_fallocate,
5446 .unlocked_ioctl = f2fs_ioctl,
5447 #ifdef CONFIG_COMPAT
5448 .compat_ioctl = f2fs_compat_ioctl,
5449 #endif
5450 .splice_read = f2fs_file_splice_read,
5451 .splice_write = iter_file_splice_write,
5452 .fadvise = f2fs_file_fadvise,
5453 .fop_flags = FOP_BUFFER_RASYNC,
5454 };
5455