1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * fs/f2fs/file.c
4 *
5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6 * http://www.samsung.com/
7 */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/stat.h>
11 #include <linux/writeback.h>
12 #include <linux/blkdev.h>
13 #include <linux/falloc.h>
14 #include <linux/types.h>
15 #include <linux/compat.h>
16 #include <linux/uaccess.h>
17 #include <linux/mount.h>
18 #include <linux/pagevec.h>
19 #include <linux/uio.h>
20 #include <linux/uuid.h>
21 #include <linux/file.h>
22 #include <linux/nls.h>
23 #include <linux/sched/signal.h>
24 #include <linux/fileattr.h>
25 #include <linux/fadvise.h>
26 #include <linux/iomap.h>
27
28 #include "f2fs.h"
29 #include "node.h"
30 #include "segment.h"
31 #include "xattr.h"
32 #include "acl.h"
33 #include "gc.h"
34 #include "iostat.h"
35 #include <trace/events/f2fs.h>
36 #include <uapi/linux/f2fs.h>
37
f2fs_zero_post_eof_page(struct inode * inode,loff_t new_size)38 static void f2fs_zero_post_eof_page(struct inode *inode, loff_t new_size)
39 {
40 loff_t old_size = i_size_read(inode);
41
42 if (old_size >= new_size)
43 return;
44
45 /* zero or drop pages only in range of [old_size, new_size] */
46 truncate_pagecache(inode, old_size);
47 }
48
f2fs_filemap_fault(struct vm_fault * vmf)49 static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
50 {
51 struct inode *inode = file_inode(vmf->vma->vm_file);
52 vm_flags_t flags = vmf->vma->vm_flags;
53 vm_fault_t ret;
54
55 ret = filemap_fault(vmf);
56 if (ret & VM_FAULT_LOCKED)
57 f2fs_update_iostat(F2FS_I_SB(inode), inode,
58 APP_MAPPED_READ_IO, F2FS_BLKSIZE);
59
60 trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret);
61
62 return ret;
63 }
64
f2fs_vm_page_mkwrite(struct vm_fault * vmf)65 static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
66 {
67 struct folio *folio = page_folio(vmf->page);
68 struct inode *inode = file_inode(vmf->vma->vm_file);
69 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
70 struct dnode_of_data dn;
71 bool need_alloc = !f2fs_is_pinned_file(inode);
72 int err = 0;
73 vm_fault_t ret;
74
75 if (unlikely(IS_IMMUTABLE(inode)))
76 return VM_FAULT_SIGBUS;
77
78 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
79 err = -EIO;
80 goto out;
81 }
82
83 if (unlikely(f2fs_cp_error(sbi))) {
84 err = -EIO;
85 goto out;
86 }
87
88 if (!f2fs_is_checkpoint_ready(sbi)) {
89 err = -ENOSPC;
90 goto out;
91 }
92
93 err = f2fs_convert_inline_inode(inode);
94 if (err)
95 goto out;
96
97 #ifdef CONFIG_F2FS_FS_COMPRESSION
98 if (f2fs_compressed_file(inode)) {
99 int ret = f2fs_is_compressed_cluster(inode, folio->index);
100
101 if (ret < 0) {
102 err = ret;
103 goto out;
104 } else if (ret) {
105 need_alloc = false;
106 }
107 }
108 #endif
109 /* should do out of any locked page */
110 if (need_alloc)
111 f2fs_balance_fs(sbi, true);
112
113 sb_start_pagefault(inode->i_sb);
114
115 f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
116
117 filemap_invalidate_lock(inode->i_mapping);
118 f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT);
119 filemap_invalidate_unlock(inode->i_mapping);
120
121 file_update_time(vmf->vma->vm_file);
122 filemap_invalidate_lock_shared(inode->i_mapping);
123
124 folio_lock(folio);
125 if (unlikely(folio->mapping != inode->i_mapping ||
126 folio_pos(folio) > i_size_read(inode) ||
127 !folio_test_uptodate(folio))) {
128 folio_unlock(folio);
129 err = -EFAULT;
130 goto out_sem;
131 }
132
133 set_new_dnode(&dn, inode, NULL, NULL, 0);
134 if (need_alloc) {
135 /* block allocation */
136 err = f2fs_get_block_locked(&dn, folio->index);
137 } else {
138 err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE);
139 f2fs_put_dnode(&dn);
140 if (f2fs_is_pinned_file(inode) &&
141 !__is_valid_data_blkaddr(dn.data_blkaddr))
142 err = -EIO;
143 }
144
145 if (err) {
146 folio_unlock(folio);
147 goto out_sem;
148 }
149
150 f2fs_folio_wait_writeback(folio, DATA, false, true);
151
152 /* wait for GCed page writeback via META_MAPPING */
153 f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
154
155 /*
156 * check to see if the page is mapped already (no holes)
157 */
158 if (folio_test_mappedtodisk(folio))
159 goto out_sem;
160
161 /* page is wholly or partially inside EOF */
162 if (((loff_t)(folio->index + 1) << PAGE_SHIFT) >
163 i_size_read(inode)) {
164 loff_t offset;
165
166 offset = i_size_read(inode) & ~PAGE_MASK;
167 folio_zero_segment(folio, offset, folio_size(folio));
168 }
169 folio_mark_dirty(folio);
170
171 f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE);
172 f2fs_update_time(sbi, REQ_TIME);
173
174 out_sem:
175 filemap_invalidate_unlock_shared(inode->i_mapping);
176
177 sb_end_pagefault(inode->i_sb);
178 out:
179 ret = vmf_fs_error(err);
180
181 trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret);
182 return ret;
183 }
184
185 static const struct vm_operations_struct f2fs_file_vm_ops = {
186 .fault = f2fs_filemap_fault,
187 .map_pages = filemap_map_pages,
188 .page_mkwrite = f2fs_vm_page_mkwrite,
189 };
190
get_parent_ino(struct inode * inode,nid_t * pino)191 static int get_parent_ino(struct inode *inode, nid_t *pino)
192 {
193 struct dentry *dentry;
194
195 /*
196 * Make sure to get the non-deleted alias. The alias associated with
197 * the open file descriptor being fsync()'ed may be deleted already.
198 */
199 dentry = d_find_alias(inode);
200 if (!dentry)
201 return 0;
202
203 *pino = d_parent_ino(dentry);
204 dput(dentry);
205 return 1;
206 }
207
need_do_checkpoint(struct inode * inode)208 static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
209 {
210 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
211 enum cp_reason_type cp_reason = CP_NO_NEEDED;
212
213 if (!S_ISREG(inode->i_mode))
214 cp_reason = CP_NON_REGULAR;
215 else if (f2fs_compressed_file(inode))
216 cp_reason = CP_COMPRESSED;
217 else if (inode->i_nlink != 1)
218 cp_reason = CP_HARDLINK;
219 else if (is_sbi_flag_set(sbi, SBI_NEED_CP))
220 cp_reason = CP_SB_NEED_CP;
221 else if (file_wrong_pino(inode))
222 cp_reason = CP_WRONG_PINO;
223 else if (!f2fs_space_for_roll_forward(sbi))
224 cp_reason = CP_NO_SPC_ROLL;
225 else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
226 cp_reason = CP_NODE_NEED_CP;
227 else if (test_opt(sbi, FASTBOOT))
228 cp_reason = CP_FASTBOOT_MODE;
229 else if (F2FS_OPTION(sbi).active_logs == 2)
230 cp_reason = CP_SPEC_LOG_NUM;
231 else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT &&
232 f2fs_need_dentry_mark(sbi, inode->i_ino) &&
233 f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino,
234 TRANS_DIR_INO))
235 cp_reason = CP_RECOVER_DIR;
236 else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino,
237 XATTR_DIR_INO))
238 cp_reason = CP_XATTR_DIR;
239
240 return cp_reason;
241 }
242
need_inode_page_update(struct f2fs_sb_info * sbi,nid_t ino)243 static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
244 {
245 struct folio *i = filemap_get_folio(NODE_MAPPING(sbi), ino);
246 bool ret = false;
247 /* But we need to avoid that there are some inode updates */
248 if ((!IS_ERR(i) && folio_test_dirty(i)) ||
249 f2fs_need_inode_block_update(sbi, ino))
250 ret = true;
251 f2fs_folio_put(i, false);
252 return ret;
253 }
254
try_to_fix_pino(struct inode * inode)255 static void try_to_fix_pino(struct inode *inode)
256 {
257 struct f2fs_inode_info *fi = F2FS_I(inode);
258 nid_t pino;
259
260 f2fs_down_write(&fi->i_sem);
261 if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
262 get_parent_ino(inode, &pino)) {
263 f2fs_i_pino_write(inode, pino);
264 file_got_pino(inode);
265 }
266 f2fs_up_write(&fi->i_sem);
267 }
268
f2fs_do_sync_file(struct file * file,loff_t start,loff_t end,int datasync,bool atomic)269 static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
270 int datasync, bool atomic)
271 {
272 struct inode *inode = file->f_mapping->host;
273 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
274 nid_t ino = inode->i_ino;
275 int ret = 0;
276 enum cp_reason_type cp_reason = 0;
277 struct writeback_control wbc = {
278 .sync_mode = WB_SYNC_ALL,
279 .nr_to_write = LONG_MAX,
280 };
281 unsigned int seq_id = 0;
282
283 if (unlikely(f2fs_readonly(inode->i_sb)))
284 return 0;
285
286 trace_f2fs_sync_file_enter(inode);
287
288 if (S_ISDIR(inode->i_mode))
289 goto go_write;
290
291 /* if fdatasync is triggered, let's do in-place-update */
292 if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
293 set_inode_flag(inode, FI_NEED_IPU);
294 ret = file_write_and_wait_range(file, start, end);
295 clear_inode_flag(inode, FI_NEED_IPU);
296
297 if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
298 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
299 return ret;
300 }
301
302 /* if the inode is dirty, let's recover all the time */
303 if (!f2fs_skip_inode_update(inode, datasync)) {
304 f2fs_write_inode(inode, NULL);
305 goto go_write;
306 }
307
308 /*
309 * if there is no written data, don't waste time to write recovery info.
310 */
311 if (!is_inode_flag_set(inode, FI_APPEND_WRITE) &&
312 !f2fs_exist_written_data(sbi, ino, APPEND_INO)) {
313
314 /* it may call write_inode just prior to fsync */
315 if (need_inode_page_update(sbi, ino))
316 goto go_write;
317
318 if (is_inode_flag_set(inode, FI_UPDATE_WRITE) ||
319 f2fs_exist_written_data(sbi, ino, UPDATE_INO))
320 goto flush_out;
321 goto out;
322 } else {
323 /*
324 * for OPU case, during fsync(), node can be persisted before
325 * data when lower device doesn't support write barrier, result
326 * in data corruption after SPO.
327 * So for strict fsync mode, force to use atomic write semantics
328 * to keep write order in between data/node and last node to
329 * avoid potential data corruption.
330 */
331 if (F2FS_OPTION(sbi).fsync_mode ==
332 FSYNC_MODE_STRICT && !atomic)
333 atomic = true;
334 }
335 go_write:
336 /*
337 * Both of fdatasync() and fsync() are able to be recovered from
338 * sudden-power-off.
339 */
340 f2fs_down_read(&F2FS_I(inode)->i_sem);
341 cp_reason = need_do_checkpoint(inode);
342 f2fs_up_read(&F2FS_I(inode)->i_sem);
343
344 if (cp_reason) {
345 /* all the dirty node pages should be flushed for POR */
346 ret = f2fs_sync_fs(inode->i_sb, 1);
347
348 /*
349 * We've secured consistency through sync_fs. Following pino
350 * will be used only for fsynced inodes after checkpoint.
351 */
352 try_to_fix_pino(inode);
353 clear_inode_flag(inode, FI_APPEND_WRITE);
354 clear_inode_flag(inode, FI_UPDATE_WRITE);
355 goto out;
356 }
357 sync_nodes:
358 atomic_inc(&sbi->wb_sync_req[NODE]);
359 ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id);
360 atomic_dec(&sbi->wb_sync_req[NODE]);
361 if (ret)
362 goto out;
363
364 /* if cp_error was enabled, we should avoid infinite loop */
365 if (unlikely(f2fs_cp_error(sbi))) {
366 ret = -EIO;
367 goto out;
368 }
369
370 if (f2fs_need_inode_block_update(sbi, ino)) {
371 f2fs_mark_inode_dirty_sync(inode, true);
372 f2fs_write_inode(inode, NULL);
373 goto sync_nodes;
374 }
375
376 /*
377 * If it's atomic_write, it's just fine to keep write ordering. So
378 * here we don't need to wait for node write completion, since we use
379 * node chain which serializes node blocks. If one of node writes are
380 * reordered, we can see simply broken chain, resulting in stopping
381 * roll-forward recovery. It means we'll recover all or none node blocks
382 * given fsync mark.
383 */
384 if (!atomic) {
385 ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id);
386 if (ret)
387 goto out;
388 }
389
390 /* once recovery info is written, don't need to tack this */
391 f2fs_remove_ino_entry(sbi, ino, APPEND_INO);
392 clear_inode_flag(inode, FI_APPEND_WRITE);
393 flush_out:
394 if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER)
395 ret = f2fs_issue_flush(sbi, inode->i_ino);
396 if (!ret) {
397 f2fs_remove_ino_entry(sbi, ino, UPDATE_INO);
398 clear_inode_flag(inode, FI_UPDATE_WRITE);
399 f2fs_remove_ino_entry(sbi, ino, FLUSH_INO);
400 }
401 f2fs_update_time(sbi, REQ_TIME);
402 out:
403 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
404 return ret;
405 }
406
f2fs_sync_file(struct file * file,loff_t start,loff_t end,int datasync)407 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
408 {
409 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
410 return -EIO;
411 return f2fs_do_sync_file(file, start, end, datasync, false);
412 }
413
__found_offset(struct address_space * mapping,struct dnode_of_data * dn,pgoff_t index,int whence)414 static bool __found_offset(struct address_space *mapping,
415 struct dnode_of_data *dn, pgoff_t index, int whence)
416 {
417 block_t blkaddr = f2fs_data_blkaddr(dn);
418 struct inode *inode = mapping->host;
419 bool compressed_cluster = false;
420
421 if (f2fs_compressed_file(inode)) {
422 block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_folio,
423 ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size));
424
425 compressed_cluster = first_blkaddr == COMPRESS_ADDR;
426 }
427
428 switch (whence) {
429 case SEEK_DATA:
430 if (__is_valid_data_blkaddr(blkaddr))
431 return true;
432 if (blkaddr == NEW_ADDR &&
433 xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY))
434 return true;
435 if (compressed_cluster)
436 return true;
437 break;
438 case SEEK_HOLE:
439 if (compressed_cluster)
440 return false;
441 if (blkaddr == NULL_ADDR)
442 return true;
443 break;
444 }
445 return false;
446 }
447
f2fs_seek_block(struct file * file,loff_t offset,int whence)448 static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
449 {
450 struct inode *inode = file->f_mapping->host;
451 loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
452 struct dnode_of_data dn;
453 pgoff_t pgofs, end_offset;
454 loff_t data_ofs = offset;
455 loff_t isize;
456 int err = 0;
457
458 inode_lock_shared(inode);
459
460 isize = i_size_read(inode);
461 if (offset >= isize)
462 goto fail;
463
464 /* handle inline data case */
465 if (f2fs_has_inline_data(inode)) {
466 if (whence == SEEK_HOLE) {
467 data_ofs = isize;
468 goto found;
469 } else if (whence == SEEK_DATA) {
470 data_ofs = offset;
471 goto found;
472 }
473 }
474
475 pgofs = (pgoff_t)(offset >> PAGE_SHIFT);
476
477 for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
478 set_new_dnode(&dn, inode, NULL, NULL, 0);
479 err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE);
480 if (err && err != -ENOENT) {
481 goto fail;
482 } else if (err == -ENOENT) {
483 /* direct node does not exists */
484 if (whence == SEEK_DATA) {
485 pgofs = f2fs_get_next_page_offset(&dn, pgofs);
486 continue;
487 } else {
488 goto found;
489 }
490 }
491
492 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
493
494 /* find data/hole in dnode block */
495 for (; dn.ofs_in_node < end_offset;
496 dn.ofs_in_node++, pgofs++,
497 data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
498 block_t blkaddr;
499
500 blkaddr = f2fs_data_blkaddr(&dn);
501
502 if (__is_valid_data_blkaddr(blkaddr) &&
503 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
504 blkaddr, DATA_GENERIC_ENHANCE)) {
505 f2fs_put_dnode(&dn);
506 goto fail;
507 }
508
509 if (__found_offset(file->f_mapping, &dn,
510 pgofs, whence)) {
511 f2fs_put_dnode(&dn);
512 goto found;
513 }
514 }
515 f2fs_put_dnode(&dn);
516 }
517
518 if (whence == SEEK_DATA)
519 goto fail;
520 found:
521 if (whence == SEEK_HOLE && data_ofs > isize)
522 data_ofs = isize;
523 inode_unlock_shared(inode);
524 return vfs_setpos(file, data_ofs, maxbytes);
525 fail:
526 inode_unlock_shared(inode);
527 return -ENXIO;
528 }
529
f2fs_llseek(struct file * file,loff_t offset,int whence)530 static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
531 {
532 struct inode *inode = file->f_mapping->host;
533 loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
534
535 switch (whence) {
536 case SEEK_SET:
537 case SEEK_CUR:
538 case SEEK_END:
539 return generic_file_llseek_size(file, offset, whence,
540 maxbytes, i_size_read(inode));
541 case SEEK_DATA:
542 case SEEK_HOLE:
543 if (offset < 0)
544 return -ENXIO;
545 return f2fs_seek_block(file, offset, whence);
546 }
547
548 return -EINVAL;
549 }
550
f2fs_file_mmap_prepare(struct vm_area_desc * desc)551 static int f2fs_file_mmap_prepare(struct vm_area_desc *desc)
552 {
553 struct file *file = desc->file;
554 struct inode *inode = file_inode(file);
555
556 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
557 return -EIO;
558
559 if (!f2fs_is_compress_backend_ready(inode))
560 return -EOPNOTSUPP;
561
562 file_accessed(file);
563 desc->vm_ops = &f2fs_file_vm_ops;
564
565 f2fs_down_read(&F2FS_I(inode)->i_sem);
566 set_inode_flag(inode, FI_MMAP_FILE);
567 f2fs_up_read(&F2FS_I(inode)->i_sem);
568
569 return 0;
570 }
571
finish_preallocate_blocks(struct inode * inode)572 static int finish_preallocate_blocks(struct inode *inode)
573 {
574 int ret = 0;
575 bool opened;
576
577 f2fs_down_read(&F2FS_I(inode)->i_sem);
578 opened = is_inode_flag_set(inode, FI_OPENED_FILE);
579 f2fs_up_read(&F2FS_I(inode)->i_sem);
580 if (opened)
581 return 0;
582
583 inode_lock(inode);
584 if (is_inode_flag_set(inode, FI_OPENED_FILE))
585 goto out_unlock;
586
587 if (!file_should_truncate(inode))
588 goto out_update;
589
590 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
591 filemap_invalidate_lock(inode->i_mapping);
592
593 truncate_setsize(inode, i_size_read(inode));
594 ret = f2fs_truncate(inode);
595
596 filemap_invalidate_unlock(inode->i_mapping);
597 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
598 if (ret)
599 goto out_unlock;
600
601 file_dont_truncate(inode);
602 out_update:
603 f2fs_down_write(&F2FS_I(inode)->i_sem);
604 set_inode_flag(inode, FI_OPENED_FILE);
605 f2fs_up_write(&F2FS_I(inode)->i_sem);
606 out_unlock:
607 inode_unlock(inode);
608 return ret;
609 }
610
f2fs_file_open(struct inode * inode,struct file * filp)611 static int f2fs_file_open(struct inode *inode, struct file *filp)
612 {
613 int err = fscrypt_file_open(inode, filp);
614
615 if (err)
616 return err;
617
618 if (!f2fs_is_compress_backend_ready(inode))
619 return -EOPNOTSUPP;
620
621 err = fsverity_file_open(inode, filp);
622 if (err)
623 return err;
624
625 filp->f_mode |= FMODE_NOWAIT;
626 filp->f_mode |= FMODE_CAN_ODIRECT;
627
628 err = dquot_file_open(inode, filp);
629 if (err)
630 return err;
631
632 err = finish_preallocate_blocks(inode);
633 if (!err)
634 atomic_inc(&F2FS_I(inode)->open_count);
635 return err;
636 }
637
f2fs_truncate_data_blocks_range(struct dnode_of_data * dn,int count)638 void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
639 {
640 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
641 int nr_free = 0, ofs = dn->ofs_in_node, len = count;
642 __le32 *addr;
643 bool compressed_cluster = false;
644 int cluster_index = 0, valid_blocks = 0;
645 int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
646 bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks);
647 block_t blkstart;
648 int blklen = 0;
649
650 addr = get_dnode_addr(dn->inode, dn->node_folio) + ofs;
651 blkstart = le32_to_cpu(*addr);
652
653 /* Assumption: truncation starts with cluster */
654 for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) {
655 block_t blkaddr = le32_to_cpu(*addr);
656
657 if (f2fs_compressed_file(dn->inode) &&
658 !(cluster_index & (cluster_size - 1))) {
659 if (compressed_cluster)
660 f2fs_i_compr_blocks_update(dn->inode,
661 valid_blocks, false);
662 compressed_cluster = (blkaddr == COMPRESS_ADDR);
663 valid_blocks = 0;
664 }
665
666 if (blkaddr == NULL_ADDR)
667 goto next;
668
669 f2fs_set_data_blkaddr(dn, NULL_ADDR);
670
671 if (__is_valid_data_blkaddr(blkaddr)) {
672 if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE))
673 goto next;
674 if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr,
675 DATA_GENERIC_ENHANCE))
676 goto next;
677 if (compressed_cluster)
678 valid_blocks++;
679 }
680
681 if (blkstart + blklen == blkaddr) {
682 blklen++;
683 } else {
684 f2fs_invalidate_blocks(sbi, blkstart, blklen);
685 blkstart = blkaddr;
686 blklen = 1;
687 }
688
689 if (!released || blkaddr != COMPRESS_ADDR)
690 nr_free++;
691
692 continue;
693
694 next:
695 if (blklen)
696 f2fs_invalidate_blocks(sbi, blkstart, blklen);
697
698 blkstart = le32_to_cpu(*(addr + 1));
699 blklen = 0;
700 }
701
702 if (blklen)
703 f2fs_invalidate_blocks(sbi, blkstart, blklen);
704
705 if (compressed_cluster)
706 f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false);
707
708 if (nr_free) {
709 pgoff_t fofs;
710 /*
711 * once we invalidate valid blkaddr in range [ofs, ofs + count],
712 * we will invalidate all blkaddr in the whole range.
713 */
714 fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio),
715 dn->inode) + ofs;
716 f2fs_update_read_extent_cache_range(dn, fofs, 0, len);
717 f2fs_update_age_extent_cache_range(dn, fofs, len);
718 dec_valid_block_count(sbi, dn->inode, nr_free);
719 }
720 dn->ofs_in_node = ofs;
721
722 f2fs_update_time(sbi, REQ_TIME);
723 trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
724 dn->ofs_in_node, nr_free);
725 }
726
truncate_partial_data_page(struct inode * inode,u64 from,bool cache_only)727 static int truncate_partial_data_page(struct inode *inode, u64 from,
728 bool cache_only)
729 {
730 loff_t offset = from & (PAGE_SIZE - 1);
731 pgoff_t index = from >> PAGE_SHIFT;
732 struct address_space *mapping = inode->i_mapping;
733 struct folio *folio;
734
735 if (!offset && !cache_only)
736 return 0;
737
738 if (cache_only) {
739 folio = filemap_lock_folio(mapping, index);
740 if (IS_ERR(folio))
741 return 0;
742 if (folio_test_uptodate(folio))
743 goto truncate_out;
744 f2fs_folio_put(folio, true);
745 return 0;
746 }
747
748 folio = f2fs_get_lock_data_folio(inode, index, true);
749 if (IS_ERR(folio))
750 return PTR_ERR(folio) == -ENOENT ? 0 : PTR_ERR(folio);
751 truncate_out:
752 f2fs_folio_wait_writeback(folio, DATA, true, true);
753 folio_zero_segment(folio, offset, folio_size(folio));
754
755 /* An encrypted inode should have a key and truncate the last page. */
756 f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode));
757 if (!cache_only)
758 folio_mark_dirty(folio);
759 f2fs_folio_put(folio, true);
760 return 0;
761 }
762
f2fs_do_truncate_blocks(struct inode * inode,u64 from,bool lock)763 int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock)
764 {
765 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
766 struct dnode_of_data dn;
767 pgoff_t free_from;
768 int count = 0, err = 0;
769 struct folio *ifolio;
770 bool truncate_page = false;
771
772 trace_f2fs_truncate_blocks_enter(inode, from);
773
774 if (IS_DEVICE_ALIASING(inode) && from) {
775 err = -EINVAL;
776 goto out_err;
777 }
778
779 free_from = (pgoff_t)F2FS_BLK_ALIGN(from);
780
781 if (free_from >= max_file_blocks(inode))
782 goto free_partial;
783
784 if (lock)
785 f2fs_lock_op(sbi);
786
787 ifolio = f2fs_get_inode_folio(sbi, inode->i_ino);
788 if (IS_ERR(ifolio)) {
789 err = PTR_ERR(ifolio);
790 goto out;
791 }
792
793 if (IS_DEVICE_ALIASING(inode)) {
794 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
795 struct extent_info ei = et->largest;
796
797 f2fs_invalidate_blocks(sbi, ei.blk, ei.len);
798
799 dec_valid_block_count(sbi, inode, ei.len);
800 f2fs_update_time(sbi, REQ_TIME);
801
802 f2fs_folio_put(ifolio, true);
803 goto out;
804 }
805
806 if (f2fs_has_inline_data(inode)) {
807 f2fs_truncate_inline_inode(inode, ifolio, from);
808 f2fs_folio_put(ifolio, true);
809 truncate_page = true;
810 goto out;
811 }
812
813 set_new_dnode(&dn, inode, ifolio, NULL, 0);
814 err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA);
815 if (err) {
816 if (err == -ENOENT)
817 goto free_next;
818 goto out;
819 }
820
821 count = ADDRS_PER_PAGE(dn.node_folio, inode);
822
823 count -= dn.ofs_in_node;
824 f2fs_bug_on(sbi, count < 0);
825
826 if (dn.ofs_in_node || IS_INODE(dn.node_folio)) {
827 f2fs_truncate_data_blocks_range(&dn, count);
828 free_from += count;
829 }
830
831 f2fs_put_dnode(&dn);
832 free_next:
833 err = f2fs_truncate_inode_blocks(inode, free_from);
834 out:
835 if (lock)
836 f2fs_unlock_op(sbi);
837 free_partial:
838 /* lastly zero out the first data page */
839 if (!err)
840 err = truncate_partial_data_page(inode, from, truncate_page);
841 out_err:
842 trace_f2fs_truncate_blocks_exit(inode, err);
843 return err;
844 }
845
f2fs_truncate_blocks(struct inode * inode,u64 from,bool lock)846 int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
847 {
848 u64 free_from = from;
849 int err;
850
851 #ifdef CONFIG_F2FS_FS_COMPRESSION
852 /*
853 * for compressed file, only support cluster size
854 * aligned truncation.
855 */
856 if (f2fs_compressed_file(inode))
857 free_from = round_up(from,
858 F2FS_I(inode)->i_cluster_size << PAGE_SHIFT);
859 #endif
860
861 err = f2fs_do_truncate_blocks(inode, free_from, lock);
862 if (err)
863 return err;
864
865 #ifdef CONFIG_F2FS_FS_COMPRESSION
866 /*
867 * For compressed file, after release compress blocks, don't allow write
868 * direct, but we should allow write direct after truncate to zero.
869 */
870 if (f2fs_compressed_file(inode) && !free_from
871 && is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
872 clear_inode_flag(inode, FI_COMPRESS_RELEASED);
873
874 if (from != free_from) {
875 err = f2fs_truncate_partial_cluster(inode, from, lock);
876 if (err)
877 return err;
878 }
879 #endif
880
881 return 0;
882 }
883
f2fs_truncate(struct inode * inode)884 int f2fs_truncate(struct inode *inode)
885 {
886 int err;
887
888 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
889 return -EIO;
890
891 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
892 S_ISLNK(inode->i_mode)))
893 return 0;
894
895 trace_f2fs_truncate(inode);
896
897 if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE))
898 return -EIO;
899
900 err = f2fs_dquot_initialize(inode);
901 if (err)
902 return err;
903
904 /* we should check inline_data size */
905 if (!f2fs_may_inline_data(inode)) {
906 err = f2fs_convert_inline_inode(inode);
907 if (err)
908 return err;
909 }
910
911 err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
912 if (err)
913 return err;
914
915 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
916 f2fs_mark_inode_dirty_sync(inode, false);
917 return 0;
918 }
919
f2fs_force_buffered_io(struct inode * inode,int rw)920 static bool f2fs_force_buffered_io(struct inode *inode, int rw)
921 {
922 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
923
924 if (!fscrypt_dio_supported(inode))
925 return true;
926 if (fsverity_active(inode))
927 return true;
928 if (f2fs_compressed_file(inode))
929 return true;
930 /*
931 * only force direct read to use buffered IO, for direct write,
932 * it expects inline data conversion before committing IO.
933 */
934 if (f2fs_has_inline_data(inode) && rw == READ)
935 return true;
936
937 /* disallow direct IO if any of devices has unaligned blksize */
938 if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
939 return true;
940 /*
941 * for blkzoned device, fallback direct IO to buffered IO, so
942 * all IOs can be serialized by log-structured write.
943 */
944 if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) &&
945 !f2fs_is_pinned_file(inode))
946 return true;
947 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
948 return true;
949
950 return false;
951 }
952
f2fs_getattr(struct mnt_idmap * idmap,const struct path * path,struct kstat * stat,u32 request_mask,unsigned int query_flags)953 int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path,
954 struct kstat *stat, u32 request_mask, unsigned int query_flags)
955 {
956 struct inode *inode = d_inode(path->dentry);
957 struct f2fs_inode_info *fi = F2FS_I(inode);
958 struct f2fs_inode *ri = NULL;
959 unsigned int flags;
960
961 if (f2fs_has_extra_attr(inode) &&
962 f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
963 F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
964 stat->result_mask |= STATX_BTIME;
965 stat->btime.tv_sec = fi->i_crtime.tv_sec;
966 stat->btime.tv_nsec = fi->i_crtime.tv_nsec;
967 }
968
969 /*
970 * Return the DIO alignment restrictions if requested. We only return
971 * this information when requested, since on encrypted files it might
972 * take a fair bit of work to get if the file wasn't opened recently.
973 *
974 * f2fs sometimes supports DIO reads but not DIO writes. STATX_DIOALIGN
975 * cannot represent that, so in that case we report no DIO support.
976 */
977 if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
978 unsigned int bsize = i_blocksize(inode);
979
980 stat->result_mask |= STATX_DIOALIGN;
981 if (!f2fs_force_buffered_io(inode, WRITE)) {
982 stat->dio_mem_align = bsize;
983 stat->dio_offset_align = bsize;
984 }
985 }
986
987 flags = fi->i_flags;
988 if (flags & F2FS_COMPR_FL)
989 stat->attributes |= STATX_ATTR_COMPRESSED;
990 if (flags & F2FS_APPEND_FL)
991 stat->attributes |= STATX_ATTR_APPEND;
992 if (IS_ENCRYPTED(inode))
993 stat->attributes |= STATX_ATTR_ENCRYPTED;
994 if (flags & F2FS_IMMUTABLE_FL)
995 stat->attributes |= STATX_ATTR_IMMUTABLE;
996 if (flags & F2FS_NODUMP_FL)
997 stat->attributes |= STATX_ATTR_NODUMP;
998 if (IS_VERITY(inode))
999 stat->attributes |= STATX_ATTR_VERITY;
1000
1001 stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
1002 STATX_ATTR_APPEND |
1003 STATX_ATTR_ENCRYPTED |
1004 STATX_ATTR_IMMUTABLE |
1005 STATX_ATTR_NODUMP |
1006 STATX_ATTR_VERITY);
1007
1008 generic_fillattr(idmap, request_mask, inode, stat);
1009
1010 /* we need to show initial sectors used for inline_data/dentries */
1011 if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
1012 f2fs_has_inline_dentry(inode))
1013 stat->blocks += (stat->size + 511) >> 9;
1014
1015 return 0;
1016 }
1017
1018 #ifdef CONFIG_F2FS_FS_POSIX_ACL
__setattr_copy(struct mnt_idmap * idmap,struct inode * inode,const struct iattr * attr)1019 static void __setattr_copy(struct mnt_idmap *idmap,
1020 struct inode *inode, const struct iattr *attr)
1021 {
1022 unsigned int ia_valid = attr->ia_valid;
1023
1024 i_uid_update(idmap, attr, inode);
1025 i_gid_update(idmap, attr, inode);
1026 if (ia_valid & ATTR_ATIME)
1027 inode_set_atime_to_ts(inode, attr->ia_atime);
1028 if (ia_valid & ATTR_MTIME)
1029 inode_set_mtime_to_ts(inode, attr->ia_mtime);
1030 if (ia_valid & ATTR_CTIME)
1031 inode_set_ctime_to_ts(inode, attr->ia_ctime);
1032 if (ia_valid & ATTR_MODE) {
1033 umode_t mode = attr->ia_mode;
1034
1035 if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode)))
1036 mode &= ~S_ISGID;
1037 set_acl_inode(inode, mode);
1038 }
1039 }
1040 #else
1041 #define __setattr_copy setattr_copy
1042 #endif
1043
f2fs_setattr(struct mnt_idmap * idmap,struct dentry * dentry,struct iattr * attr)1044 int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
1045 struct iattr *attr)
1046 {
1047 struct inode *inode = d_inode(dentry);
1048 struct f2fs_inode_info *fi = F2FS_I(inode);
1049 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1050 int err;
1051
1052 if (unlikely(f2fs_cp_error(sbi)))
1053 return -EIO;
1054
1055 err = setattr_prepare(idmap, dentry, attr);
1056 if (err)
1057 return err;
1058
1059 err = fscrypt_prepare_setattr(dentry, attr);
1060 if (err)
1061 return err;
1062
1063 err = fsverity_prepare_setattr(dentry, attr);
1064 if (err)
1065 return err;
1066
1067 if (unlikely(IS_IMMUTABLE(inode)))
1068 return -EPERM;
1069
1070 if (unlikely(IS_APPEND(inode) &&
1071 (attr->ia_valid & (ATTR_MODE | ATTR_UID |
1072 ATTR_GID | ATTR_TIMES_SET))))
1073 return -EPERM;
1074
1075 if ((attr->ia_valid & ATTR_SIZE)) {
1076 if (!f2fs_is_compress_backend_ready(inode) ||
1077 IS_DEVICE_ALIASING(inode))
1078 return -EOPNOTSUPP;
1079 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) &&
1080 !IS_ALIGNED(attr->ia_size,
1081 F2FS_BLK_TO_BYTES(fi->i_cluster_size)))
1082 return -EINVAL;
1083 /*
1084 * To prevent scattered pin block generation, we don't allow
1085 * smaller/equal size unaligned truncation for pinned file.
1086 * We only support overwrite IO to pinned file, so don't
1087 * care about larger size truncation.
1088 */
1089 if (f2fs_is_pinned_file(inode) &&
1090 attr->ia_size <= i_size_read(inode) &&
1091 !IS_ALIGNED(attr->ia_size,
1092 F2FS_BLK_TO_BYTES(CAP_BLKS_PER_SEC(sbi))))
1093 return -EINVAL;
1094 }
1095
1096 if (is_quota_modification(idmap, inode, attr)) {
1097 err = f2fs_dquot_initialize(inode);
1098 if (err)
1099 return err;
1100 }
1101 if (i_uid_needs_update(idmap, attr, inode) ||
1102 i_gid_needs_update(idmap, attr, inode)) {
1103 f2fs_lock_op(sbi);
1104 err = dquot_transfer(idmap, inode, attr);
1105 if (err) {
1106 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
1107 f2fs_unlock_op(sbi);
1108 return err;
1109 }
1110 /*
1111 * update uid/gid under lock_op(), so that dquot and inode can
1112 * be updated atomically.
1113 */
1114 i_uid_update(idmap, attr, inode);
1115 i_gid_update(idmap, attr, inode);
1116 f2fs_mark_inode_dirty_sync(inode, true);
1117 f2fs_unlock_op(sbi);
1118 }
1119
1120 if (attr->ia_valid & ATTR_SIZE) {
1121 loff_t old_size = i_size_read(inode);
1122
1123 if (attr->ia_size > MAX_INLINE_DATA(inode)) {
1124 /*
1125 * should convert inline inode before i_size_write to
1126 * keep smaller than inline_data size with inline flag.
1127 */
1128 err = f2fs_convert_inline_inode(inode);
1129 if (err)
1130 return err;
1131 }
1132
1133 /*
1134 * wait for inflight dio, blocks should be removed after
1135 * IO completion.
1136 */
1137 if (attr->ia_size < old_size)
1138 inode_dio_wait(inode);
1139
1140 f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
1141 filemap_invalidate_lock(inode->i_mapping);
1142
1143 if (attr->ia_size > old_size)
1144 f2fs_zero_post_eof_page(inode, attr->ia_size);
1145 truncate_setsize(inode, attr->ia_size);
1146
1147 if (attr->ia_size <= old_size)
1148 err = f2fs_truncate(inode);
1149 /*
1150 * do not trim all blocks after i_size if target size is
1151 * larger than i_size.
1152 */
1153 filemap_invalidate_unlock(inode->i_mapping);
1154 f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
1155 if (err)
1156 return err;
1157
1158 spin_lock(&fi->i_size_lock);
1159 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
1160 fi->last_disk_size = i_size_read(inode);
1161 spin_unlock(&fi->i_size_lock);
1162 }
1163
1164 __setattr_copy(idmap, inode, attr);
1165
1166 if (attr->ia_valid & ATTR_MODE) {
1167 err = posix_acl_chmod(idmap, dentry, f2fs_get_inode_mode(inode));
1168
1169 if (is_inode_flag_set(inode, FI_ACL_MODE)) {
1170 if (!err)
1171 inode->i_mode = fi->i_acl_mode;
1172 clear_inode_flag(inode, FI_ACL_MODE);
1173 }
1174 }
1175
1176 /* file size may changed here */
1177 f2fs_mark_inode_dirty_sync(inode, true);
1178
1179 /* inode change will produce dirty node pages flushed by checkpoint */
1180 f2fs_balance_fs(sbi, true);
1181
1182 return err;
1183 }
1184
1185 const struct inode_operations f2fs_file_inode_operations = {
1186 .getattr = f2fs_getattr,
1187 .setattr = f2fs_setattr,
1188 .get_inode_acl = f2fs_get_acl,
1189 .set_acl = f2fs_set_acl,
1190 .listxattr = f2fs_listxattr,
1191 .fiemap = f2fs_fiemap,
1192 .fileattr_get = f2fs_fileattr_get,
1193 .fileattr_set = f2fs_fileattr_set,
1194 };
1195
fill_zero(struct inode * inode,pgoff_t index,loff_t start,loff_t len)1196 static int fill_zero(struct inode *inode, pgoff_t index,
1197 loff_t start, loff_t len)
1198 {
1199 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1200 struct folio *folio;
1201
1202 if (!len)
1203 return 0;
1204
1205 f2fs_balance_fs(sbi, true);
1206
1207 f2fs_lock_op(sbi);
1208 folio = f2fs_get_new_data_folio(inode, NULL, index, false);
1209 f2fs_unlock_op(sbi);
1210
1211 if (IS_ERR(folio))
1212 return PTR_ERR(folio);
1213
1214 f2fs_folio_wait_writeback(folio, DATA, true, true);
1215 folio_zero_range(folio, start, len);
1216 folio_mark_dirty(folio);
1217 f2fs_folio_put(folio, true);
1218 return 0;
1219 }
1220
f2fs_truncate_hole(struct inode * inode,pgoff_t pg_start,pgoff_t pg_end)1221 int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
1222 {
1223 int err;
1224
1225 while (pg_start < pg_end) {
1226 struct dnode_of_data dn;
1227 pgoff_t end_offset, count;
1228
1229 set_new_dnode(&dn, inode, NULL, NULL, 0);
1230 err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE);
1231 if (err) {
1232 if (err == -ENOENT) {
1233 pg_start = f2fs_get_next_page_offset(&dn,
1234 pg_start);
1235 continue;
1236 }
1237 return err;
1238 }
1239
1240 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
1241 count = min(end_offset - dn.ofs_in_node, pg_end - pg_start);
1242
1243 f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset);
1244
1245 f2fs_truncate_data_blocks_range(&dn, count);
1246 f2fs_put_dnode(&dn);
1247
1248 pg_start += count;
1249 }
1250 return 0;
1251 }
1252
f2fs_punch_hole(struct inode * inode,loff_t offset,loff_t len)1253 static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
1254 {
1255 pgoff_t pg_start, pg_end;
1256 loff_t off_start, off_end;
1257 int ret;
1258
1259 ret = f2fs_convert_inline_inode(inode);
1260 if (ret)
1261 return ret;
1262
1263 filemap_invalidate_lock(inode->i_mapping);
1264 f2fs_zero_post_eof_page(inode, offset + len);
1265 filemap_invalidate_unlock(inode->i_mapping);
1266
1267 pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
1268 pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
1269
1270 off_start = offset & (PAGE_SIZE - 1);
1271 off_end = (offset + len) & (PAGE_SIZE - 1);
1272
1273 if (pg_start == pg_end) {
1274 ret = fill_zero(inode, pg_start, off_start,
1275 off_end - off_start);
1276 if (ret)
1277 return ret;
1278 } else {
1279 if (off_start) {
1280 ret = fill_zero(inode, pg_start++, off_start,
1281 PAGE_SIZE - off_start);
1282 if (ret)
1283 return ret;
1284 }
1285 if (off_end) {
1286 ret = fill_zero(inode, pg_end, 0, off_end);
1287 if (ret)
1288 return ret;
1289 }
1290
1291 if (pg_start < pg_end) {
1292 loff_t blk_start, blk_end;
1293 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1294
1295 f2fs_balance_fs(sbi, true);
1296
1297 blk_start = (loff_t)pg_start << PAGE_SHIFT;
1298 blk_end = (loff_t)pg_end << PAGE_SHIFT;
1299
1300 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1301 filemap_invalidate_lock(inode->i_mapping);
1302
1303 truncate_pagecache_range(inode, blk_start, blk_end - 1);
1304
1305 f2fs_lock_op(sbi);
1306 ret = f2fs_truncate_hole(inode, pg_start, pg_end);
1307 f2fs_unlock_op(sbi);
1308
1309 filemap_invalidate_unlock(inode->i_mapping);
1310 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1311 }
1312 }
1313
1314 return ret;
1315 }
1316
__read_out_blkaddrs(struct inode * inode,block_t * blkaddr,int * do_replace,pgoff_t off,pgoff_t len)1317 static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr,
1318 int *do_replace, pgoff_t off, pgoff_t len)
1319 {
1320 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1321 struct dnode_of_data dn;
1322 int ret, done, i;
1323
1324 next_dnode:
1325 set_new_dnode(&dn, inode, NULL, NULL, 0);
1326 ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
1327 if (ret && ret != -ENOENT) {
1328 return ret;
1329 } else if (ret == -ENOENT) {
1330 if (dn.max_level == 0)
1331 return -ENOENT;
1332 done = min((pgoff_t)ADDRS_PER_BLOCK(inode) -
1333 dn.ofs_in_node, len);
1334 blkaddr += done;
1335 do_replace += done;
1336 goto next;
1337 }
1338
1339 done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, inode) -
1340 dn.ofs_in_node, len);
1341 for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
1342 *blkaddr = f2fs_data_blkaddr(&dn);
1343
1344 if (__is_valid_data_blkaddr(*blkaddr) &&
1345 !f2fs_is_valid_blkaddr(sbi, *blkaddr,
1346 DATA_GENERIC_ENHANCE)) {
1347 f2fs_put_dnode(&dn);
1348 return -EFSCORRUPTED;
1349 }
1350
1351 if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) {
1352
1353 if (f2fs_lfs_mode(sbi)) {
1354 f2fs_put_dnode(&dn);
1355 return -EOPNOTSUPP;
1356 }
1357
1358 /* do not invalidate this block address */
1359 f2fs_update_data_blkaddr(&dn, NULL_ADDR);
1360 *do_replace = 1;
1361 }
1362 }
1363 f2fs_put_dnode(&dn);
1364 next:
1365 len -= done;
1366 off += done;
1367 if (len)
1368 goto next_dnode;
1369 return 0;
1370 }
1371
__roll_back_blkaddrs(struct inode * inode,block_t * blkaddr,int * do_replace,pgoff_t off,int len)1372 static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr,
1373 int *do_replace, pgoff_t off, int len)
1374 {
1375 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1376 struct dnode_of_data dn;
1377 int ret, i;
1378
1379 for (i = 0; i < len; i++, do_replace++, blkaddr++) {
1380 if (*do_replace == 0)
1381 continue;
1382
1383 set_new_dnode(&dn, inode, NULL, NULL, 0);
1384 ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA);
1385 if (ret) {
1386 dec_valid_block_count(sbi, inode, 1);
1387 f2fs_invalidate_blocks(sbi, *blkaddr, 1);
1388 } else {
1389 f2fs_update_data_blkaddr(&dn, *blkaddr);
1390 }
1391 f2fs_put_dnode(&dn);
1392 }
1393 return 0;
1394 }
1395
__clone_blkaddrs(struct inode * src_inode,struct inode * dst_inode,block_t * blkaddr,int * do_replace,pgoff_t src,pgoff_t dst,pgoff_t len,bool full)1396 static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
1397 block_t *blkaddr, int *do_replace,
1398 pgoff_t src, pgoff_t dst, pgoff_t len, bool full)
1399 {
1400 struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode);
1401 pgoff_t i = 0;
1402 int ret;
1403
1404 while (i < len) {
1405 if (blkaddr[i] == NULL_ADDR && !full) {
1406 i++;
1407 continue;
1408 }
1409
1410 if (do_replace[i] || blkaddr[i] == NULL_ADDR) {
1411 struct dnode_of_data dn;
1412 struct node_info ni;
1413 size_t new_size;
1414 pgoff_t ilen;
1415
1416 set_new_dnode(&dn, dst_inode, NULL, NULL, 0);
1417 ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE);
1418 if (ret)
1419 return ret;
1420
1421 ret = f2fs_get_node_info(sbi, dn.nid, &ni, false);
1422 if (ret) {
1423 f2fs_put_dnode(&dn);
1424 return ret;
1425 }
1426
1427 ilen = min((pgoff_t)
1428 ADDRS_PER_PAGE(dn.node_folio, dst_inode) -
1429 dn.ofs_in_node, len - i);
1430 do {
1431 dn.data_blkaddr = f2fs_data_blkaddr(&dn);
1432 f2fs_truncate_data_blocks_range(&dn, 1);
1433
1434 if (do_replace[i]) {
1435 f2fs_i_blocks_write(src_inode,
1436 1, false, false);
1437 f2fs_i_blocks_write(dst_inode,
1438 1, true, false);
1439 f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
1440 blkaddr[i], ni.version, true, false);
1441
1442 do_replace[i] = 0;
1443 }
1444 dn.ofs_in_node++;
1445 i++;
1446 new_size = (loff_t)(dst + i) << PAGE_SHIFT;
1447 if (dst_inode->i_size < new_size)
1448 f2fs_i_size_write(dst_inode, new_size);
1449 } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR));
1450
1451 f2fs_put_dnode(&dn);
1452 } else {
1453 struct folio *fsrc, *fdst;
1454
1455 fsrc = f2fs_get_lock_data_folio(src_inode,
1456 src + i, true);
1457 if (IS_ERR(fsrc))
1458 return PTR_ERR(fsrc);
1459 fdst = f2fs_get_new_data_folio(dst_inode, NULL, dst + i,
1460 true);
1461 if (IS_ERR(fdst)) {
1462 f2fs_folio_put(fsrc, true);
1463 return PTR_ERR(fdst);
1464 }
1465
1466 f2fs_folio_wait_writeback(fdst, DATA, true, true);
1467
1468 memcpy_folio(fdst, 0, fsrc, 0, PAGE_SIZE);
1469 folio_mark_dirty(fdst);
1470 folio_set_f2fs_gcing(fdst);
1471 f2fs_folio_put(fdst, true);
1472 f2fs_folio_put(fsrc, true);
1473
1474 ret = f2fs_truncate_hole(src_inode,
1475 src + i, src + i + 1);
1476 if (ret)
1477 return ret;
1478 i++;
1479 }
1480 }
1481 return 0;
1482 }
1483
__exchange_data_block(struct inode * src_inode,struct inode * dst_inode,pgoff_t src,pgoff_t dst,pgoff_t len,bool full)1484 static int __exchange_data_block(struct inode *src_inode,
1485 struct inode *dst_inode, pgoff_t src, pgoff_t dst,
1486 pgoff_t len, bool full)
1487 {
1488 block_t *src_blkaddr;
1489 int *do_replace;
1490 pgoff_t olen;
1491 int ret;
1492
1493 while (len) {
1494 olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len);
1495
1496 src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode),
1497 array_size(olen, sizeof(block_t)),
1498 GFP_NOFS);
1499 if (!src_blkaddr)
1500 return -ENOMEM;
1501
1502 do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode),
1503 array_size(olen, sizeof(int)),
1504 GFP_NOFS);
1505 if (!do_replace) {
1506 kvfree(src_blkaddr);
1507 return -ENOMEM;
1508 }
1509
1510 ret = __read_out_blkaddrs(src_inode, src_blkaddr,
1511 do_replace, src, olen);
1512 if (ret)
1513 goto roll_back;
1514
1515 ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr,
1516 do_replace, src, dst, olen, full);
1517 if (ret)
1518 goto roll_back;
1519
1520 src += olen;
1521 dst += olen;
1522 len -= olen;
1523
1524 kvfree(src_blkaddr);
1525 kvfree(do_replace);
1526 }
1527 return 0;
1528
1529 roll_back:
1530 __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen);
1531 kvfree(src_blkaddr);
1532 kvfree(do_replace);
1533 return ret;
1534 }
1535
f2fs_do_collapse(struct inode * inode,loff_t offset,loff_t len)1536 static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
1537 {
1538 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1539 pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
1540 pgoff_t start = offset >> PAGE_SHIFT;
1541 pgoff_t end = (offset + len) >> PAGE_SHIFT;
1542 int ret;
1543
1544 f2fs_balance_fs(sbi, true);
1545
1546 /* avoid gc operation during block exchange */
1547 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1548 filemap_invalidate_lock(inode->i_mapping);
1549
1550 f2fs_zero_post_eof_page(inode, offset + len);
1551
1552 f2fs_lock_op(sbi);
1553 f2fs_drop_extent_tree(inode);
1554 truncate_pagecache(inode, offset);
1555 ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
1556 f2fs_unlock_op(sbi);
1557
1558 filemap_invalidate_unlock(inode->i_mapping);
1559 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1560 return ret;
1561 }
1562
f2fs_collapse_range(struct inode * inode,loff_t offset,loff_t len)1563 static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
1564 {
1565 loff_t new_size;
1566 int ret;
1567
1568 if (offset + len >= i_size_read(inode))
1569 return -EINVAL;
1570
1571 /* collapse range should be aligned to block size of f2fs. */
1572 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
1573 return -EINVAL;
1574
1575 ret = f2fs_convert_inline_inode(inode);
1576 if (ret)
1577 return ret;
1578
1579 /* write out all dirty pages from offset */
1580 ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
1581 if (ret)
1582 return ret;
1583
1584 ret = f2fs_do_collapse(inode, offset, len);
1585 if (ret)
1586 return ret;
1587
1588 /* write out all moved pages, if possible */
1589 filemap_invalidate_lock(inode->i_mapping);
1590 filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
1591 truncate_pagecache(inode, offset);
1592
1593 new_size = i_size_read(inode) - len;
1594 ret = f2fs_truncate_blocks(inode, new_size, true);
1595 filemap_invalidate_unlock(inode->i_mapping);
1596 if (!ret)
1597 f2fs_i_size_write(inode, new_size);
1598 return ret;
1599 }
1600
f2fs_do_zero_range(struct dnode_of_data * dn,pgoff_t start,pgoff_t end)1601 static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
1602 pgoff_t end)
1603 {
1604 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1605 pgoff_t index = start;
1606 unsigned int ofs_in_node = dn->ofs_in_node;
1607 blkcnt_t count = 0;
1608 int ret;
1609
1610 for (; index < end; index++, dn->ofs_in_node++) {
1611 if (f2fs_data_blkaddr(dn) == NULL_ADDR)
1612 count++;
1613 }
1614
1615 dn->ofs_in_node = ofs_in_node;
1616 ret = f2fs_reserve_new_blocks(dn, count);
1617 if (ret)
1618 return ret;
1619
1620 dn->ofs_in_node = ofs_in_node;
1621 for (index = start; index < end; index++, dn->ofs_in_node++) {
1622 dn->data_blkaddr = f2fs_data_blkaddr(dn);
1623 /*
1624 * f2fs_reserve_new_blocks will not guarantee entire block
1625 * allocation.
1626 */
1627 if (dn->data_blkaddr == NULL_ADDR) {
1628 ret = -ENOSPC;
1629 break;
1630 }
1631
1632 if (dn->data_blkaddr == NEW_ADDR)
1633 continue;
1634
1635 if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr,
1636 DATA_GENERIC_ENHANCE)) {
1637 ret = -EFSCORRUPTED;
1638 break;
1639 }
1640
1641 f2fs_invalidate_blocks(sbi, dn->data_blkaddr, 1);
1642 f2fs_set_data_blkaddr(dn, NEW_ADDR);
1643 }
1644
1645 f2fs_update_read_extent_cache_range(dn, start, 0, index - start);
1646 f2fs_update_age_extent_cache_range(dn, start, index - start);
1647
1648 return ret;
1649 }
1650
f2fs_zero_range(struct inode * inode,loff_t offset,loff_t len,int mode)1651 static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
1652 int mode)
1653 {
1654 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1655 struct address_space *mapping = inode->i_mapping;
1656 pgoff_t index, pg_start, pg_end;
1657 loff_t new_size = i_size_read(inode);
1658 loff_t off_start, off_end;
1659 int ret = 0;
1660
1661 ret = inode_newsize_ok(inode, (len + offset));
1662 if (ret)
1663 return ret;
1664
1665 ret = f2fs_convert_inline_inode(inode);
1666 if (ret)
1667 return ret;
1668
1669 ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
1670 if (ret)
1671 return ret;
1672
1673 filemap_invalidate_lock(mapping);
1674 f2fs_zero_post_eof_page(inode, offset + len);
1675 filemap_invalidate_unlock(mapping);
1676
1677 pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
1678 pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
1679
1680 off_start = offset & (PAGE_SIZE - 1);
1681 off_end = (offset + len) & (PAGE_SIZE - 1);
1682
1683 if (pg_start == pg_end) {
1684 ret = fill_zero(inode, pg_start, off_start,
1685 off_end - off_start);
1686 if (ret)
1687 return ret;
1688
1689 new_size = max_t(loff_t, new_size, offset + len);
1690 } else {
1691 if (off_start) {
1692 ret = fill_zero(inode, pg_start++, off_start,
1693 PAGE_SIZE - off_start);
1694 if (ret)
1695 return ret;
1696
1697 new_size = max_t(loff_t, new_size,
1698 (loff_t)pg_start << PAGE_SHIFT);
1699 }
1700
1701 for (index = pg_start; index < pg_end;) {
1702 struct dnode_of_data dn;
1703 unsigned int end_offset;
1704 pgoff_t end;
1705
1706 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1707 filemap_invalidate_lock(mapping);
1708
1709 truncate_pagecache_range(inode,
1710 (loff_t)index << PAGE_SHIFT,
1711 ((loff_t)pg_end << PAGE_SHIFT) - 1);
1712
1713 f2fs_lock_op(sbi);
1714
1715 set_new_dnode(&dn, inode, NULL, NULL, 0);
1716 ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
1717 if (ret) {
1718 f2fs_unlock_op(sbi);
1719 filemap_invalidate_unlock(mapping);
1720 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1721 goto out;
1722 }
1723
1724 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
1725 end = min(pg_end, end_offset - dn.ofs_in_node + index);
1726
1727 ret = f2fs_do_zero_range(&dn, index, end);
1728 f2fs_put_dnode(&dn);
1729
1730 f2fs_unlock_op(sbi);
1731 filemap_invalidate_unlock(mapping);
1732 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1733
1734 f2fs_balance_fs(sbi, dn.node_changed);
1735
1736 if (ret)
1737 goto out;
1738
1739 index = end;
1740 new_size = max_t(loff_t, new_size,
1741 (loff_t)index << PAGE_SHIFT);
1742 }
1743
1744 if (off_end) {
1745 ret = fill_zero(inode, pg_end, 0, off_end);
1746 if (ret)
1747 goto out;
1748
1749 new_size = max_t(loff_t, new_size, offset + len);
1750 }
1751 }
1752
1753 out:
1754 if (new_size > i_size_read(inode)) {
1755 if (mode & FALLOC_FL_KEEP_SIZE)
1756 file_set_keep_isize(inode);
1757 else
1758 f2fs_i_size_write(inode, new_size);
1759 }
1760 return ret;
1761 }
1762
f2fs_insert_range(struct inode * inode,loff_t offset,loff_t len)1763 static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
1764 {
1765 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1766 struct address_space *mapping = inode->i_mapping;
1767 pgoff_t nr, pg_start, pg_end, delta, idx;
1768 loff_t new_size;
1769 int ret = 0;
1770
1771 new_size = i_size_read(inode) + len;
1772 ret = inode_newsize_ok(inode, new_size);
1773 if (ret)
1774 return ret;
1775
1776 if (offset >= i_size_read(inode))
1777 return -EINVAL;
1778
1779 /* insert range should be aligned to block size of f2fs. */
1780 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
1781 return -EINVAL;
1782
1783 ret = f2fs_convert_inline_inode(inode);
1784 if (ret)
1785 return ret;
1786
1787 f2fs_balance_fs(sbi, true);
1788
1789 filemap_invalidate_lock(mapping);
1790 ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
1791 filemap_invalidate_unlock(mapping);
1792 if (ret)
1793 return ret;
1794
1795 /* write out all dirty pages from offset */
1796 ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
1797 if (ret)
1798 return ret;
1799
1800 pg_start = offset >> PAGE_SHIFT;
1801 pg_end = (offset + len) >> PAGE_SHIFT;
1802 delta = pg_end - pg_start;
1803 idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
1804
1805 /* avoid gc operation during block exchange */
1806 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1807 filemap_invalidate_lock(mapping);
1808
1809 f2fs_zero_post_eof_page(inode, offset + len);
1810 truncate_pagecache(inode, offset);
1811
1812 while (!ret && idx > pg_start) {
1813 nr = idx - pg_start;
1814 if (nr > delta)
1815 nr = delta;
1816 idx -= nr;
1817
1818 f2fs_lock_op(sbi);
1819 f2fs_drop_extent_tree(inode);
1820
1821 ret = __exchange_data_block(inode, inode, idx,
1822 idx + delta, nr, false);
1823 f2fs_unlock_op(sbi);
1824 }
1825 filemap_invalidate_unlock(mapping);
1826 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1827 if (ret)
1828 return ret;
1829
1830 /* write out all moved pages, if possible */
1831 filemap_invalidate_lock(mapping);
1832 ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
1833 truncate_pagecache(inode, offset);
1834 filemap_invalidate_unlock(mapping);
1835
1836 if (!ret)
1837 f2fs_i_size_write(inode, new_size);
1838 return ret;
1839 }
1840
f2fs_expand_inode_data(struct inode * inode,loff_t offset,loff_t len,int mode)1841 static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
1842 loff_t len, int mode)
1843 {
1844 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1845 struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
1846 .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
1847 .m_may_create = true };
1848 struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
1849 .init_gc_type = FG_GC,
1850 .should_migrate_blocks = false,
1851 .err_gc_skipped = true,
1852 .nr_free_secs = 0 };
1853 pgoff_t pg_start, pg_end;
1854 loff_t new_size;
1855 loff_t off_end;
1856 block_t expanded = 0;
1857 int err;
1858
1859 err = inode_newsize_ok(inode, (len + offset));
1860 if (err)
1861 return err;
1862
1863 err = f2fs_convert_inline_inode(inode);
1864 if (err)
1865 return err;
1866
1867 filemap_invalidate_lock(inode->i_mapping);
1868 f2fs_zero_post_eof_page(inode, offset + len);
1869 filemap_invalidate_unlock(inode->i_mapping);
1870
1871 f2fs_balance_fs(sbi, true);
1872
1873 pg_start = ((unsigned long long)offset) >> PAGE_SHIFT;
1874 pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT;
1875 off_end = (offset + len) & (PAGE_SIZE - 1);
1876
1877 map.m_lblk = pg_start;
1878 map.m_len = pg_end - pg_start;
1879 if (off_end)
1880 map.m_len++;
1881
1882 if (!map.m_len)
1883 return 0;
1884
1885 if (f2fs_is_pinned_file(inode)) {
1886 block_t sec_blks = CAP_BLKS_PER_SEC(sbi);
1887 block_t sec_len = roundup(map.m_len, sec_blks);
1888
1889 map.m_len = sec_blks;
1890 next_alloc:
1891 f2fs_down_write(&sbi->pin_sem);
1892
1893 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
1894 if (has_not_enough_free_secs(sbi, 0, 0)) {
1895 f2fs_up_write(&sbi->pin_sem);
1896 err = -ENOSPC;
1897 f2fs_warn_ratelimited(sbi,
1898 "ino:%lu, start:%lu, end:%lu, need to trigger GC to "
1899 "reclaim enough free segment when checkpoint is enabled",
1900 inode->i_ino, pg_start, pg_end);
1901 goto out_err;
1902 }
1903 }
1904
1905 if (has_not_enough_free_secs(sbi, 0,
1906 sbi->reserved_pin_section)) {
1907 f2fs_down_write(&sbi->gc_lock);
1908 stat_inc_gc_call_count(sbi, FOREGROUND);
1909 err = f2fs_gc(sbi, &gc_control);
1910 if (err && err != -ENODATA) {
1911 f2fs_up_write(&sbi->pin_sem);
1912 goto out_err;
1913 }
1914 }
1915
1916 err = f2fs_allocate_pinning_section(sbi);
1917 if (err) {
1918 f2fs_up_write(&sbi->pin_sem);
1919 goto out_err;
1920 }
1921
1922 map.m_seg_type = CURSEG_COLD_DATA_PINNED;
1923 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
1924 file_dont_truncate(inode);
1925
1926 f2fs_up_write(&sbi->pin_sem);
1927
1928 expanded += map.m_len;
1929 sec_len -= map.m_len;
1930 map.m_lblk += map.m_len;
1931 if (!err && sec_len)
1932 goto next_alloc;
1933
1934 map.m_len = expanded;
1935 } else {
1936 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_AIO);
1937 expanded = map.m_len;
1938 }
1939 out_err:
1940 if (err) {
1941 pgoff_t last_off;
1942
1943 if (!expanded)
1944 return err;
1945
1946 last_off = pg_start + expanded - 1;
1947
1948 /* update new size to the failed position */
1949 new_size = (last_off == pg_end) ? offset + len :
1950 (loff_t)(last_off + 1) << PAGE_SHIFT;
1951 } else {
1952 new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end;
1953 }
1954
1955 if (new_size > i_size_read(inode)) {
1956 if (mode & FALLOC_FL_KEEP_SIZE)
1957 file_set_keep_isize(inode);
1958 else
1959 f2fs_i_size_write(inode, new_size);
1960 }
1961
1962 return err;
1963 }
1964
f2fs_fallocate(struct file * file,int mode,loff_t offset,loff_t len)1965 static long f2fs_fallocate(struct file *file, int mode,
1966 loff_t offset, loff_t len)
1967 {
1968 struct inode *inode = file_inode(file);
1969 long ret = 0;
1970
1971 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
1972 return -EIO;
1973 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
1974 return -ENOSPC;
1975 if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode))
1976 return -EOPNOTSUPP;
1977
1978 /* f2fs only support ->fallocate for regular file */
1979 if (!S_ISREG(inode->i_mode))
1980 return -EINVAL;
1981
1982 if (IS_ENCRYPTED(inode) &&
1983 (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
1984 return -EOPNOTSUPP;
1985
1986 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
1987 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
1988 FALLOC_FL_INSERT_RANGE))
1989 return -EOPNOTSUPP;
1990
1991 inode_lock(inode);
1992
1993 /*
1994 * Pinned file should not support partial truncation since the block
1995 * can be used by applications.
1996 */
1997 if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) &&
1998 (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE |
1999 FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) {
2000 ret = -EOPNOTSUPP;
2001 goto out;
2002 }
2003
2004 ret = file_modified(file);
2005 if (ret)
2006 goto out;
2007
2008 /*
2009 * wait for inflight dio, blocks should be removed after IO
2010 * completion.
2011 */
2012 inode_dio_wait(inode);
2013
2014 if (mode & FALLOC_FL_PUNCH_HOLE) {
2015 if (offset >= inode->i_size)
2016 goto out;
2017
2018 ret = f2fs_punch_hole(inode, offset, len);
2019 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
2020 ret = f2fs_collapse_range(inode, offset, len);
2021 } else if (mode & FALLOC_FL_ZERO_RANGE) {
2022 ret = f2fs_zero_range(inode, offset, len, mode);
2023 } else if (mode & FALLOC_FL_INSERT_RANGE) {
2024 ret = f2fs_insert_range(inode, offset, len);
2025 } else {
2026 ret = f2fs_expand_inode_data(inode, offset, len, mode);
2027 }
2028
2029 if (!ret) {
2030 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
2031 f2fs_mark_inode_dirty_sync(inode, false);
2032 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2033 }
2034
2035 out:
2036 inode_unlock(inode);
2037
2038 trace_f2fs_fallocate(inode, mode, offset, len, ret);
2039 return ret;
2040 }
2041
f2fs_release_file(struct inode * inode,struct file * filp)2042 static int f2fs_release_file(struct inode *inode, struct file *filp)
2043 {
2044 if (atomic_dec_and_test(&F2FS_I(inode)->open_count))
2045 f2fs_remove_donate_inode(inode);
2046
2047 /*
2048 * f2fs_release_file is called at every close calls. So we should
2049 * not drop any inmemory pages by close called by other process.
2050 */
2051 if (!(filp->f_mode & FMODE_WRITE) ||
2052 atomic_read(&inode->i_writecount) != 1)
2053 return 0;
2054
2055 inode_lock(inode);
2056 f2fs_abort_atomic_write(inode, true);
2057 inode_unlock(inode);
2058
2059 return 0;
2060 }
2061
f2fs_file_flush(struct file * file,fl_owner_t id)2062 static int f2fs_file_flush(struct file *file, fl_owner_t id)
2063 {
2064 struct inode *inode = file_inode(file);
2065
2066 /*
2067 * If the process doing a transaction is crashed, we should do
2068 * roll-back. Otherwise, other reader/write can see corrupted database
2069 * until all the writers close its file. Since this should be done
2070 * before dropping file lock, it needs to do in ->flush.
2071 */
2072 if (F2FS_I(inode)->atomic_write_task == current &&
2073 (current->flags & PF_EXITING)) {
2074 inode_lock(inode);
2075 f2fs_abort_atomic_write(inode, true);
2076 inode_unlock(inode);
2077 }
2078
2079 return 0;
2080 }
2081
f2fs_setflags_common(struct inode * inode,u32 iflags,u32 mask)2082 static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
2083 {
2084 struct f2fs_inode_info *fi = F2FS_I(inode);
2085 u32 masked_flags = fi->i_flags & mask;
2086
2087 /* mask can be shrunk by flags_valid selector */
2088 iflags &= mask;
2089
2090 /* Is it quota file? Do not allow user to mess with it */
2091 if (IS_NOQUOTA(inode))
2092 return -EPERM;
2093
2094 if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) {
2095 if (!f2fs_sb_has_casefold(F2FS_I_SB(inode)))
2096 return -EOPNOTSUPP;
2097 if (!f2fs_empty_dir(inode))
2098 return -ENOTEMPTY;
2099 }
2100
2101 if (iflags & (F2FS_COMPR_FL | F2FS_NOCOMP_FL)) {
2102 if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
2103 return -EOPNOTSUPP;
2104 if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL))
2105 return -EINVAL;
2106 }
2107
2108 if ((iflags ^ masked_flags) & F2FS_COMPR_FL) {
2109 if (masked_flags & F2FS_COMPR_FL) {
2110 if (!f2fs_disable_compressed_file(inode))
2111 return -EINVAL;
2112 } else {
2113 /* try to convert inline_data to support compression */
2114 int err = f2fs_convert_inline_inode(inode);
2115 if (err)
2116 return err;
2117
2118 f2fs_down_write(&fi->i_sem);
2119 if (!f2fs_may_compress(inode) ||
2120 (S_ISREG(inode->i_mode) &&
2121 F2FS_HAS_BLOCKS(inode))) {
2122 f2fs_up_write(&fi->i_sem);
2123 return -EINVAL;
2124 }
2125 err = set_compress_context(inode);
2126 f2fs_up_write(&fi->i_sem);
2127
2128 if (err)
2129 return err;
2130 }
2131 }
2132
2133 fi->i_flags = iflags | (fi->i_flags & ~mask);
2134 f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) &&
2135 (fi->i_flags & F2FS_NOCOMP_FL));
2136
2137 if (fi->i_flags & F2FS_PROJINHERIT_FL)
2138 set_inode_flag(inode, FI_PROJ_INHERIT);
2139 else
2140 clear_inode_flag(inode, FI_PROJ_INHERIT);
2141
2142 inode_set_ctime_current(inode);
2143 f2fs_set_inode_flags(inode);
2144 f2fs_mark_inode_dirty_sync(inode, true);
2145 return 0;
2146 }
2147
2148 /* FS_IOC_[GS]ETFLAGS and FS_IOC_FS[GS]ETXATTR support */
2149
2150 /*
2151 * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry
2152 * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to
2153 * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add
2154 * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL.
2155 *
2156 * Translating flags to fsx_flags value used by FS_IOC_FSGETXATTR and
2157 * FS_IOC_FSSETXATTR is done by the VFS.
2158 */
2159
2160 static const struct {
2161 u32 iflag;
2162 u32 fsflag;
2163 } f2fs_fsflags_map[] = {
2164 { F2FS_COMPR_FL, FS_COMPR_FL },
2165 { F2FS_SYNC_FL, FS_SYNC_FL },
2166 { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL },
2167 { F2FS_APPEND_FL, FS_APPEND_FL },
2168 { F2FS_NODUMP_FL, FS_NODUMP_FL },
2169 { F2FS_NOATIME_FL, FS_NOATIME_FL },
2170 { F2FS_NOCOMP_FL, FS_NOCOMP_FL },
2171 { F2FS_INDEX_FL, FS_INDEX_FL },
2172 { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL },
2173 { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL },
2174 { F2FS_CASEFOLD_FL, FS_CASEFOLD_FL },
2175 };
2176
2177 #define F2FS_GETTABLE_FS_FL ( \
2178 FS_COMPR_FL | \
2179 FS_SYNC_FL | \
2180 FS_IMMUTABLE_FL | \
2181 FS_APPEND_FL | \
2182 FS_NODUMP_FL | \
2183 FS_NOATIME_FL | \
2184 FS_NOCOMP_FL | \
2185 FS_INDEX_FL | \
2186 FS_DIRSYNC_FL | \
2187 FS_PROJINHERIT_FL | \
2188 FS_ENCRYPT_FL | \
2189 FS_INLINE_DATA_FL | \
2190 FS_NOCOW_FL | \
2191 FS_VERITY_FL | \
2192 FS_CASEFOLD_FL)
2193
2194 #define F2FS_SETTABLE_FS_FL ( \
2195 FS_COMPR_FL | \
2196 FS_SYNC_FL | \
2197 FS_IMMUTABLE_FL | \
2198 FS_APPEND_FL | \
2199 FS_NODUMP_FL | \
2200 FS_NOATIME_FL | \
2201 FS_NOCOMP_FL | \
2202 FS_DIRSYNC_FL | \
2203 FS_PROJINHERIT_FL | \
2204 FS_CASEFOLD_FL)
2205
2206 /* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */
f2fs_iflags_to_fsflags(u32 iflags)2207 static inline u32 f2fs_iflags_to_fsflags(u32 iflags)
2208 {
2209 u32 fsflags = 0;
2210 int i;
2211
2212 for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
2213 if (iflags & f2fs_fsflags_map[i].iflag)
2214 fsflags |= f2fs_fsflags_map[i].fsflag;
2215
2216 return fsflags;
2217 }
2218
2219 /* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */
f2fs_fsflags_to_iflags(u32 fsflags)2220 static inline u32 f2fs_fsflags_to_iflags(u32 fsflags)
2221 {
2222 u32 iflags = 0;
2223 int i;
2224
2225 for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
2226 if (fsflags & f2fs_fsflags_map[i].fsflag)
2227 iflags |= f2fs_fsflags_map[i].iflag;
2228
2229 return iflags;
2230 }
2231
f2fs_ioc_getversion(struct file * filp,unsigned long arg)2232 static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
2233 {
2234 struct inode *inode = file_inode(filp);
2235
2236 return put_user(inode->i_generation, (int __user *)arg);
2237 }
2238
f2fs_ioc_start_atomic_write(struct file * filp,bool truncate)2239 static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
2240 {
2241 struct inode *inode = file_inode(filp);
2242 struct mnt_idmap *idmap = file_mnt_idmap(filp);
2243 struct f2fs_inode_info *fi = F2FS_I(inode);
2244 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2245 loff_t isize;
2246 int ret;
2247
2248 if (!(filp->f_mode & FMODE_WRITE))
2249 return -EBADF;
2250
2251 if (!inode_owner_or_capable(idmap, inode))
2252 return -EACCES;
2253
2254 if (!S_ISREG(inode->i_mode))
2255 return -EINVAL;
2256
2257 if (filp->f_flags & O_DIRECT)
2258 return -EINVAL;
2259
2260 ret = mnt_want_write_file(filp);
2261 if (ret)
2262 return ret;
2263
2264 inode_lock(inode);
2265
2266 if (!f2fs_disable_compressed_file(inode) ||
2267 f2fs_is_pinned_file(inode)) {
2268 ret = -EINVAL;
2269 goto out;
2270 }
2271
2272 if (f2fs_is_atomic_file(inode))
2273 goto out;
2274
2275 ret = f2fs_convert_inline_inode(inode);
2276 if (ret)
2277 goto out;
2278
2279 f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
2280 f2fs_down_write(&fi->i_gc_rwsem[READ]);
2281
2282 /*
2283 * Should wait end_io to count F2FS_WB_CP_DATA correctly by
2284 * f2fs_is_atomic_file.
2285 */
2286 if (get_dirty_pages(inode))
2287 f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u",
2288 inode->i_ino, get_dirty_pages(inode));
2289 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
2290 if (ret)
2291 goto out_unlock;
2292
2293 /* Check if the inode already has a COW inode */
2294 if (fi->cow_inode == NULL) {
2295 /* Create a COW inode for atomic write */
2296 struct dentry *dentry = file_dentry(filp);
2297 struct inode *dir = d_inode(dentry->d_parent);
2298
2299 ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode);
2300 if (ret)
2301 goto out_unlock;
2302
2303 set_inode_flag(fi->cow_inode, FI_COW_FILE);
2304 clear_inode_flag(fi->cow_inode, FI_INLINE_DATA);
2305
2306 /* Set the COW inode's atomic_inode to the atomic inode */
2307 F2FS_I(fi->cow_inode)->atomic_inode = inode;
2308 } else {
2309 /* Reuse the already created COW inode */
2310 f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode));
2311
2312 invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1);
2313
2314 ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true);
2315 if (ret)
2316 goto out_unlock;
2317 }
2318
2319 f2fs_write_inode(inode, NULL);
2320
2321 stat_inc_atomic_inode(inode);
2322
2323 set_inode_flag(inode, FI_ATOMIC_FILE);
2324
2325 isize = i_size_read(inode);
2326 fi->original_i_size = isize;
2327 if (truncate) {
2328 set_inode_flag(inode, FI_ATOMIC_REPLACE);
2329 truncate_inode_pages_final(inode->i_mapping);
2330 f2fs_i_size_write(inode, 0);
2331 isize = 0;
2332 }
2333 f2fs_i_size_write(fi->cow_inode, isize);
2334
2335 out_unlock:
2336 f2fs_up_write(&fi->i_gc_rwsem[READ]);
2337 f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
2338 if (ret)
2339 goto out;
2340
2341 f2fs_update_time(sbi, REQ_TIME);
2342 fi->atomic_write_task = current;
2343 stat_update_max_atomic_write(inode);
2344 fi->atomic_write_cnt = 0;
2345 out:
2346 inode_unlock(inode);
2347 mnt_drop_write_file(filp);
2348 return ret;
2349 }
2350
f2fs_ioc_commit_atomic_write(struct file * filp)2351 static int f2fs_ioc_commit_atomic_write(struct file *filp)
2352 {
2353 struct inode *inode = file_inode(filp);
2354 struct mnt_idmap *idmap = file_mnt_idmap(filp);
2355 int ret;
2356
2357 if (!(filp->f_mode & FMODE_WRITE))
2358 return -EBADF;
2359
2360 if (!inode_owner_or_capable(idmap, inode))
2361 return -EACCES;
2362
2363 ret = mnt_want_write_file(filp);
2364 if (ret)
2365 return ret;
2366
2367 f2fs_balance_fs(F2FS_I_SB(inode), true);
2368
2369 inode_lock(inode);
2370
2371 if (f2fs_is_atomic_file(inode)) {
2372 ret = f2fs_commit_atomic_write(inode);
2373 if (!ret)
2374 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
2375
2376 f2fs_abort_atomic_write(inode, ret);
2377 } else {
2378 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
2379 }
2380
2381 inode_unlock(inode);
2382 mnt_drop_write_file(filp);
2383 return ret;
2384 }
2385
f2fs_ioc_abort_atomic_write(struct file * filp)2386 static int f2fs_ioc_abort_atomic_write(struct file *filp)
2387 {
2388 struct inode *inode = file_inode(filp);
2389 struct mnt_idmap *idmap = file_mnt_idmap(filp);
2390 int ret;
2391
2392 if (!(filp->f_mode & FMODE_WRITE))
2393 return -EBADF;
2394
2395 if (!inode_owner_or_capable(idmap, inode))
2396 return -EACCES;
2397
2398 ret = mnt_want_write_file(filp);
2399 if (ret)
2400 return ret;
2401
2402 inode_lock(inode);
2403
2404 f2fs_abort_atomic_write(inode, true);
2405
2406 inode_unlock(inode);
2407
2408 mnt_drop_write_file(filp);
2409 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2410 return ret;
2411 }
2412
f2fs_do_shutdown(struct f2fs_sb_info * sbi,unsigned int flag,bool readonly,bool need_lock)2413 int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag,
2414 bool readonly, bool need_lock)
2415 {
2416 struct super_block *sb = sbi->sb;
2417 int ret = 0;
2418
2419 switch (flag) {
2420 case F2FS_GOING_DOWN_FULLSYNC:
2421 ret = bdev_freeze(sb->s_bdev);
2422 if (ret)
2423 goto out;
2424 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2425 bdev_thaw(sb->s_bdev);
2426 break;
2427 case F2FS_GOING_DOWN_METASYNC:
2428 /* do checkpoint only */
2429 ret = f2fs_sync_fs(sb, 1);
2430 if (ret) {
2431 if (ret == -EIO)
2432 ret = 0;
2433 goto out;
2434 }
2435 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2436 break;
2437 case F2FS_GOING_DOWN_NOSYNC:
2438 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2439 break;
2440 case F2FS_GOING_DOWN_METAFLUSH:
2441 f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
2442 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2443 break;
2444 case F2FS_GOING_DOWN_NEED_FSCK:
2445 set_sbi_flag(sbi, SBI_NEED_FSCK);
2446 set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
2447 set_sbi_flag(sbi, SBI_IS_DIRTY);
2448 /* do checkpoint only */
2449 ret = f2fs_sync_fs(sb, 1);
2450 if (ret == -EIO)
2451 ret = 0;
2452 goto out;
2453 default:
2454 ret = -EINVAL;
2455 goto out;
2456 }
2457
2458 if (readonly)
2459 goto out;
2460
2461 /*
2462 * grab sb->s_umount to avoid racing w/ remount() and other shutdown
2463 * paths.
2464 */
2465 if (need_lock)
2466 down_write(&sbi->sb->s_umount);
2467
2468 f2fs_stop_gc_thread(sbi);
2469 f2fs_stop_discard_thread(sbi);
2470
2471 f2fs_drop_discard_cmd(sbi);
2472 clear_opt(sbi, DISCARD);
2473
2474 if (need_lock)
2475 up_write(&sbi->sb->s_umount);
2476
2477 f2fs_update_time(sbi, REQ_TIME);
2478 out:
2479
2480 trace_f2fs_shutdown(sbi, flag, ret);
2481
2482 return ret;
2483 }
2484
f2fs_ioc_shutdown(struct file * filp,unsigned long arg)2485 static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
2486 {
2487 struct inode *inode = file_inode(filp);
2488 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2489 __u32 in;
2490 int ret;
2491 bool need_drop = false, readonly = false;
2492
2493 if (!capable(CAP_SYS_ADMIN))
2494 return -EPERM;
2495
2496 if (get_user(in, (__u32 __user *)arg))
2497 return -EFAULT;
2498
2499 if (in != F2FS_GOING_DOWN_FULLSYNC) {
2500 ret = mnt_want_write_file(filp);
2501 if (ret) {
2502 if (ret != -EROFS)
2503 return ret;
2504
2505 /* fallback to nosync shutdown for readonly fs */
2506 in = F2FS_GOING_DOWN_NOSYNC;
2507 readonly = true;
2508 } else {
2509 need_drop = true;
2510 }
2511 }
2512
2513 ret = f2fs_do_shutdown(sbi, in, readonly, true);
2514
2515 if (need_drop)
2516 mnt_drop_write_file(filp);
2517
2518 return ret;
2519 }
2520
f2fs_keep_noreuse_range(struct inode * inode,loff_t offset,loff_t len)2521 static int f2fs_keep_noreuse_range(struct inode *inode,
2522 loff_t offset, loff_t len)
2523 {
2524 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2525 u64 max_bytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
2526 u64 start, end;
2527 int ret = 0;
2528
2529 if (!S_ISREG(inode->i_mode))
2530 return 0;
2531
2532 if (offset >= max_bytes || len > max_bytes ||
2533 (offset + len) > max_bytes)
2534 return 0;
2535
2536 start = offset >> PAGE_SHIFT;
2537 end = DIV_ROUND_UP(offset + len, PAGE_SIZE);
2538
2539 inode_lock(inode);
2540 if (f2fs_is_atomic_file(inode)) {
2541 inode_unlock(inode);
2542 return 0;
2543 }
2544
2545 spin_lock(&sbi->inode_lock[DONATE_INODE]);
2546 /* let's remove the range, if len = 0 */
2547 if (!len) {
2548 if (!list_empty(&F2FS_I(inode)->gdonate_list)) {
2549 list_del_init(&F2FS_I(inode)->gdonate_list);
2550 sbi->donate_files--;
2551 if (is_inode_flag_set(inode, FI_DONATE_FINISHED))
2552 ret = -EALREADY;
2553 else
2554 set_inode_flag(inode, FI_DONATE_FINISHED);
2555 } else
2556 ret = -ENOENT;
2557 } else {
2558 if (list_empty(&F2FS_I(inode)->gdonate_list)) {
2559 list_add_tail(&F2FS_I(inode)->gdonate_list,
2560 &sbi->inode_list[DONATE_INODE]);
2561 sbi->donate_files++;
2562 } else {
2563 list_move_tail(&F2FS_I(inode)->gdonate_list,
2564 &sbi->inode_list[DONATE_INODE]);
2565 }
2566 F2FS_I(inode)->donate_start = start;
2567 F2FS_I(inode)->donate_end = end - 1;
2568 clear_inode_flag(inode, FI_DONATE_FINISHED);
2569 }
2570 spin_unlock(&sbi->inode_lock[DONATE_INODE]);
2571 inode_unlock(inode);
2572
2573 return ret;
2574 }
2575
f2fs_ioc_fitrim(struct file * filp,unsigned long arg)2576 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
2577 {
2578 struct inode *inode = file_inode(filp);
2579 struct super_block *sb = inode->i_sb;
2580 struct fstrim_range range;
2581 int ret;
2582
2583 if (!capable(CAP_SYS_ADMIN))
2584 return -EPERM;
2585
2586 if (!f2fs_hw_support_discard(F2FS_SB(sb)))
2587 return -EOPNOTSUPP;
2588
2589 if (copy_from_user(&range, (struct fstrim_range __user *)arg,
2590 sizeof(range)))
2591 return -EFAULT;
2592
2593 ret = mnt_want_write_file(filp);
2594 if (ret)
2595 return ret;
2596
2597 range.minlen = max((unsigned int)range.minlen,
2598 bdev_discard_granularity(sb->s_bdev));
2599 ret = f2fs_trim_fs(F2FS_SB(sb), &range);
2600 mnt_drop_write_file(filp);
2601 if (ret < 0)
2602 return ret;
2603
2604 if (copy_to_user((struct fstrim_range __user *)arg, &range,
2605 sizeof(range)))
2606 return -EFAULT;
2607 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2608 return 0;
2609 }
2610
uuid_is_nonzero(__u8 u[16])2611 static bool uuid_is_nonzero(__u8 u[16])
2612 {
2613 int i;
2614
2615 for (i = 0; i < 16; i++)
2616 if (u[i])
2617 return true;
2618 return false;
2619 }
2620
f2fs_ioc_set_encryption_policy(struct file * filp,unsigned long arg)2621 static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
2622 {
2623 struct inode *inode = file_inode(filp);
2624 int ret;
2625
2626 if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode)))
2627 return -EOPNOTSUPP;
2628
2629 ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
2630 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2631 return ret;
2632 }
2633
f2fs_ioc_get_encryption_policy(struct file * filp,unsigned long arg)2634 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
2635 {
2636 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2637 return -EOPNOTSUPP;
2638 return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
2639 }
2640
f2fs_ioc_get_encryption_pwsalt(struct file * filp,unsigned long arg)2641 static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
2642 {
2643 struct inode *inode = file_inode(filp);
2644 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2645 u8 encrypt_pw_salt[16];
2646 int err;
2647
2648 if (!f2fs_sb_has_encrypt(sbi))
2649 return -EOPNOTSUPP;
2650
2651 err = mnt_want_write_file(filp);
2652 if (err)
2653 return err;
2654
2655 f2fs_down_write(&sbi->sb_lock);
2656
2657 if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt))
2658 goto got_it;
2659
2660 /* update superblock with uuid */
2661 generate_random_uuid(sbi->raw_super->encrypt_pw_salt);
2662
2663 err = f2fs_commit_super(sbi, false);
2664 if (err) {
2665 /* undo new data */
2666 memset(sbi->raw_super->encrypt_pw_salt, 0, 16);
2667 goto out_err;
2668 }
2669 got_it:
2670 memcpy(encrypt_pw_salt, sbi->raw_super->encrypt_pw_salt, 16);
2671 out_err:
2672 f2fs_up_write(&sbi->sb_lock);
2673 mnt_drop_write_file(filp);
2674
2675 if (!err && copy_to_user((__u8 __user *)arg, encrypt_pw_salt, 16))
2676 err = -EFAULT;
2677
2678 return err;
2679 }
2680
f2fs_ioc_get_encryption_policy_ex(struct file * filp,unsigned long arg)2681 static int f2fs_ioc_get_encryption_policy_ex(struct file *filp,
2682 unsigned long arg)
2683 {
2684 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2685 return -EOPNOTSUPP;
2686
2687 return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg);
2688 }
2689
f2fs_ioc_add_encryption_key(struct file * filp,unsigned long arg)2690 static int f2fs_ioc_add_encryption_key(struct file *filp, unsigned long arg)
2691 {
2692 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2693 return -EOPNOTSUPP;
2694
2695 return fscrypt_ioctl_add_key(filp, (void __user *)arg);
2696 }
2697
f2fs_ioc_remove_encryption_key(struct file * filp,unsigned long arg)2698 static int f2fs_ioc_remove_encryption_key(struct file *filp, unsigned long arg)
2699 {
2700 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2701 return -EOPNOTSUPP;
2702
2703 return fscrypt_ioctl_remove_key(filp, (void __user *)arg);
2704 }
2705
f2fs_ioc_remove_encryption_key_all_users(struct file * filp,unsigned long arg)2706 static int f2fs_ioc_remove_encryption_key_all_users(struct file *filp,
2707 unsigned long arg)
2708 {
2709 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2710 return -EOPNOTSUPP;
2711
2712 return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg);
2713 }
2714
f2fs_ioc_get_encryption_key_status(struct file * filp,unsigned long arg)2715 static int f2fs_ioc_get_encryption_key_status(struct file *filp,
2716 unsigned long arg)
2717 {
2718 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2719 return -EOPNOTSUPP;
2720
2721 return fscrypt_ioctl_get_key_status(filp, (void __user *)arg);
2722 }
2723
f2fs_ioc_get_encryption_nonce(struct file * filp,unsigned long arg)2724 static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg)
2725 {
2726 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2727 return -EOPNOTSUPP;
2728
2729 return fscrypt_ioctl_get_nonce(filp, (void __user *)arg);
2730 }
2731
f2fs_ioc_gc(struct file * filp,unsigned long arg)2732 static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
2733 {
2734 struct inode *inode = file_inode(filp);
2735 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2736 struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
2737 .no_bg_gc = false,
2738 .should_migrate_blocks = false,
2739 .nr_free_secs = 0 };
2740 __u32 sync;
2741 int ret;
2742
2743 if (!capable(CAP_SYS_ADMIN))
2744 return -EPERM;
2745
2746 if (get_user(sync, (__u32 __user *)arg))
2747 return -EFAULT;
2748
2749 if (f2fs_readonly(sbi->sb))
2750 return -EROFS;
2751
2752 ret = mnt_want_write_file(filp);
2753 if (ret)
2754 return ret;
2755
2756 if (!sync) {
2757 if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
2758 ret = -EBUSY;
2759 goto out;
2760 }
2761 } else {
2762 f2fs_down_write(&sbi->gc_lock);
2763 }
2764
2765 gc_control.init_gc_type = sync ? FG_GC : BG_GC;
2766 gc_control.err_gc_skipped = sync;
2767 stat_inc_gc_call_count(sbi, FOREGROUND);
2768 ret = f2fs_gc(sbi, &gc_control);
2769 out:
2770 mnt_drop_write_file(filp);
2771 return ret;
2772 }
2773
__f2fs_ioc_gc_range(struct file * filp,struct f2fs_gc_range * range)2774 static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
2775 {
2776 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
2777 struct f2fs_gc_control gc_control = {
2778 .init_gc_type = range->sync ? FG_GC : BG_GC,
2779 .no_bg_gc = false,
2780 .should_migrate_blocks = false,
2781 .err_gc_skipped = range->sync,
2782 .nr_free_secs = 0 };
2783 u64 end;
2784 int ret;
2785
2786 if (!capable(CAP_SYS_ADMIN))
2787 return -EPERM;
2788 if (f2fs_readonly(sbi->sb))
2789 return -EROFS;
2790
2791 end = range->start + range->len;
2792 if (end < range->start || range->start < MAIN_BLKADDR(sbi) ||
2793 end >= MAX_BLKADDR(sbi))
2794 return -EINVAL;
2795
2796 ret = mnt_want_write_file(filp);
2797 if (ret)
2798 return ret;
2799
2800 do_more:
2801 if (!range->sync) {
2802 if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
2803 ret = -EBUSY;
2804 goto out;
2805 }
2806 } else {
2807 f2fs_down_write(&sbi->gc_lock);
2808 }
2809
2810 gc_control.victim_segno = GET_SEGNO(sbi, range->start);
2811 stat_inc_gc_call_count(sbi, FOREGROUND);
2812 ret = f2fs_gc(sbi, &gc_control);
2813 if (ret) {
2814 if (ret == -EBUSY)
2815 ret = -EAGAIN;
2816 goto out;
2817 }
2818 range->start += CAP_BLKS_PER_SEC(sbi);
2819 if (range->start <= end)
2820 goto do_more;
2821 out:
2822 mnt_drop_write_file(filp);
2823 return ret;
2824 }
2825
f2fs_ioc_gc_range(struct file * filp,unsigned long arg)2826 static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
2827 {
2828 struct f2fs_gc_range range;
2829
2830 if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg,
2831 sizeof(range)))
2832 return -EFAULT;
2833 return __f2fs_ioc_gc_range(filp, &range);
2834 }
2835
f2fs_ioc_write_checkpoint(struct file * filp)2836 static int f2fs_ioc_write_checkpoint(struct file *filp)
2837 {
2838 struct inode *inode = file_inode(filp);
2839 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2840 int ret;
2841
2842 if (!capable(CAP_SYS_ADMIN))
2843 return -EPERM;
2844
2845 if (f2fs_readonly(sbi->sb))
2846 return -EROFS;
2847
2848 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2849 f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled.");
2850 return -EINVAL;
2851 }
2852
2853 ret = mnt_want_write_file(filp);
2854 if (ret)
2855 return ret;
2856
2857 ret = f2fs_sync_fs(sbi->sb, 1);
2858
2859 mnt_drop_write_file(filp);
2860 return ret;
2861 }
2862
f2fs_defragment_range(struct f2fs_sb_info * sbi,struct file * filp,struct f2fs_defragment * range)2863 static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
2864 struct file *filp,
2865 struct f2fs_defragment *range)
2866 {
2867 struct inode *inode = file_inode(filp);
2868 struct f2fs_map_blocks map = { .m_next_extent = NULL,
2869 .m_seg_type = NO_CHECK_TYPE,
2870 .m_may_create = false };
2871 struct extent_info ei = {};
2872 pgoff_t pg_start, pg_end, next_pgofs;
2873 unsigned int total = 0, sec_num;
2874 block_t blk_end = 0;
2875 bool fragmented = false;
2876 int err;
2877
2878 f2fs_balance_fs(sbi, true);
2879
2880 inode_lock(inode);
2881 pg_start = range->start >> PAGE_SHIFT;
2882 pg_end = min_t(pgoff_t,
2883 (range->start + range->len) >> PAGE_SHIFT,
2884 DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE));
2885
2886 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) ||
2887 f2fs_is_atomic_file(inode)) {
2888 err = -EINVAL;
2889 goto unlock_out;
2890 }
2891
2892 /* if in-place-update policy is enabled, don't waste time here */
2893 set_inode_flag(inode, FI_OPU_WRITE);
2894 if (f2fs_should_update_inplace(inode, NULL)) {
2895 err = -EINVAL;
2896 goto out;
2897 }
2898
2899 /* writeback all dirty pages in the range */
2900 err = filemap_write_and_wait_range(inode->i_mapping,
2901 pg_start << PAGE_SHIFT,
2902 (pg_end << PAGE_SHIFT) - 1);
2903 if (err)
2904 goto out;
2905
2906 /*
2907 * lookup mapping info in extent cache, skip defragmenting if physical
2908 * block addresses are continuous.
2909 */
2910 if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
2911 if ((pgoff_t)ei.fofs + ei.len >= pg_end)
2912 goto out;
2913 }
2914
2915 map.m_lblk = pg_start;
2916 map.m_next_pgofs = &next_pgofs;
2917
2918 /*
2919 * lookup mapping info in dnode page cache, skip defragmenting if all
2920 * physical block addresses are continuous even if there are hole(s)
2921 * in logical blocks.
2922 */
2923 while (map.m_lblk < pg_end) {
2924 map.m_len = pg_end - map.m_lblk;
2925 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
2926 if (err)
2927 goto out;
2928
2929 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
2930 map.m_lblk = next_pgofs;
2931 continue;
2932 }
2933
2934 if (blk_end && blk_end != map.m_pblk)
2935 fragmented = true;
2936
2937 /* record total count of block that we're going to move */
2938 total += map.m_len;
2939
2940 blk_end = map.m_pblk + map.m_len;
2941
2942 map.m_lblk += map.m_len;
2943 }
2944
2945 if (!fragmented) {
2946 total = 0;
2947 goto out;
2948 }
2949
2950 sec_num = DIV_ROUND_UP(total, CAP_BLKS_PER_SEC(sbi));
2951
2952 /*
2953 * make sure there are enough free section for LFS allocation, this can
2954 * avoid defragment running in SSR mode when free section are allocated
2955 * intensively
2956 */
2957 if (has_not_enough_free_secs(sbi, 0, sec_num)) {
2958 err = -EAGAIN;
2959 goto out;
2960 }
2961
2962 map.m_lblk = pg_start;
2963 map.m_len = pg_end - pg_start;
2964 total = 0;
2965
2966 while (map.m_lblk < pg_end) {
2967 pgoff_t idx;
2968 int cnt = 0;
2969
2970 do_map:
2971 map.m_len = pg_end - map.m_lblk;
2972 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
2973 if (err)
2974 goto clear_out;
2975
2976 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
2977 map.m_lblk = next_pgofs;
2978 goto check;
2979 }
2980
2981 set_inode_flag(inode, FI_SKIP_WRITES);
2982
2983 idx = map.m_lblk;
2984 while (idx < map.m_lblk + map.m_len &&
2985 cnt < BLKS_PER_SEG(sbi)) {
2986 struct folio *folio;
2987
2988 folio = f2fs_get_lock_data_folio(inode, idx, true);
2989 if (IS_ERR(folio)) {
2990 err = PTR_ERR(folio);
2991 goto clear_out;
2992 }
2993
2994 f2fs_folio_wait_writeback(folio, DATA, true, true);
2995
2996 folio_mark_dirty(folio);
2997 folio_set_f2fs_gcing(folio);
2998 f2fs_folio_put(folio, true);
2999
3000 idx++;
3001 cnt++;
3002 total++;
3003 }
3004
3005 map.m_lblk = idx;
3006 check:
3007 if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi))
3008 goto do_map;
3009
3010 clear_inode_flag(inode, FI_SKIP_WRITES);
3011
3012 err = filemap_fdatawrite(inode->i_mapping);
3013 if (err)
3014 goto out;
3015 }
3016 clear_out:
3017 clear_inode_flag(inode, FI_SKIP_WRITES);
3018 out:
3019 clear_inode_flag(inode, FI_OPU_WRITE);
3020 unlock_out:
3021 inode_unlock(inode);
3022 if (!err)
3023 range->len = (u64)total << PAGE_SHIFT;
3024 return err;
3025 }
3026
f2fs_ioc_defragment(struct file * filp,unsigned long arg)3027 static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
3028 {
3029 struct inode *inode = file_inode(filp);
3030 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3031 struct f2fs_defragment range;
3032 int err;
3033
3034 if (!capable(CAP_SYS_ADMIN))
3035 return -EPERM;
3036
3037 if (!S_ISREG(inode->i_mode))
3038 return -EINVAL;
3039
3040 if (f2fs_readonly(sbi->sb))
3041 return -EROFS;
3042
3043 if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
3044 sizeof(range)))
3045 return -EFAULT;
3046
3047 /* verify alignment of offset & size */
3048 if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1))
3049 return -EINVAL;
3050
3051 if (unlikely((range.start + range.len) >> PAGE_SHIFT >
3052 max_file_blocks(inode)))
3053 return -EINVAL;
3054
3055 err = mnt_want_write_file(filp);
3056 if (err)
3057 return err;
3058
3059 err = f2fs_defragment_range(sbi, filp, &range);
3060 mnt_drop_write_file(filp);
3061
3062 if (range.len)
3063 f2fs_update_time(sbi, REQ_TIME);
3064 if (err < 0)
3065 return err;
3066
3067 if (copy_to_user((struct f2fs_defragment __user *)arg, &range,
3068 sizeof(range)))
3069 return -EFAULT;
3070
3071 return 0;
3072 }
3073
f2fs_move_file_range(struct file * file_in,loff_t pos_in,struct file * file_out,loff_t pos_out,size_t len)3074 static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
3075 struct file *file_out, loff_t pos_out, size_t len)
3076 {
3077 struct inode *src = file_inode(file_in);
3078 struct inode *dst = file_inode(file_out);
3079 struct f2fs_sb_info *sbi = F2FS_I_SB(src);
3080 size_t olen = len, dst_max_i_size = 0;
3081 size_t dst_osize;
3082 int ret;
3083
3084 if (file_in->f_path.mnt != file_out->f_path.mnt ||
3085 src->i_sb != dst->i_sb)
3086 return -EXDEV;
3087
3088 if (unlikely(f2fs_readonly(src->i_sb)))
3089 return -EROFS;
3090
3091 if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode))
3092 return -EINVAL;
3093
3094 if (IS_ENCRYPTED(src) || IS_ENCRYPTED(dst))
3095 return -EOPNOTSUPP;
3096
3097 if (pos_out < 0 || pos_in < 0)
3098 return -EINVAL;
3099
3100 if (src == dst) {
3101 if (pos_in == pos_out)
3102 return 0;
3103 if (pos_out > pos_in && pos_out < pos_in + len)
3104 return -EINVAL;
3105 }
3106
3107 inode_lock(src);
3108 if (src != dst) {
3109 ret = -EBUSY;
3110 if (!inode_trylock(dst))
3111 goto out;
3112 }
3113
3114 if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) ||
3115 f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) {
3116 ret = -EOPNOTSUPP;
3117 goto out_unlock;
3118 }
3119
3120 if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) {
3121 ret = -EINVAL;
3122 goto out_unlock;
3123 }
3124
3125 ret = -EINVAL;
3126 if (pos_in + len > src->i_size || pos_in + len < pos_in)
3127 goto out_unlock;
3128 if (len == 0)
3129 olen = len = src->i_size - pos_in;
3130 if (pos_in + len == src->i_size)
3131 len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in;
3132 if (len == 0) {
3133 ret = 0;
3134 goto out_unlock;
3135 }
3136
3137 dst_osize = dst->i_size;
3138 if (pos_out + olen > dst->i_size)
3139 dst_max_i_size = pos_out + olen;
3140
3141 /* verify the end result is block aligned */
3142 if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) ||
3143 !IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) ||
3144 !IS_ALIGNED(pos_out, F2FS_BLKSIZE))
3145 goto out_unlock;
3146
3147 ret = f2fs_convert_inline_inode(src);
3148 if (ret)
3149 goto out_unlock;
3150
3151 ret = f2fs_convert_inline_inode(dst);
3152 if (ret)
3153 goto out_unlock;
3154
3155 /* write out all dirty pages from offset */
3156 ret = filemap_write_and_wait_range(src->i_mapping,
3157 pos_in, pos_in + len);
3158 if (ret)
3159 goto out_unlock;
3160
3161 ret = filemap_write_and_wait_range(dst->i_mapping,
3162 pos_out, pos_out + len);
3163 if (ret)
3164 goto out_unlock;
3165
3166 f2fs_balance_fs(sbi, true);
3167
3168 f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
3169 if (src != dst) {
3170 ret = -EBUSY;
3171 if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
3172 goto out_src;
3173 }
3174
3175 f2fs_lock_op(sbi);
3176 ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in),
3177 F2FS_BYTES_TO_BLK(pos_out),
3178 F2FS_BYTES_TO_BLK(len), false);
3179
3180 if (!ret) {
3181 if (dst_max_i_size)
3182 f2fs_i_size_write(dst, dst_max_i_size);
3183 else if (dst_osize != dst->i_size)
3184 f2fs_i_size_write(dst, dst_osize);
3185 }
3186 f2fs_unlock_op(sbi);
3187
3188 if (src != dst)
3189 f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
3190 out_src:
3191 f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
3192 if (ret)
3193 goto out_unlock;
3194
3195 inode_set_mtime_to_ts(src, inode_set_ctime_current(src));
3196 f2fs_mark_inode_dirty_sync(src, false);
3197 if (src != dst) {
3198 inode_set_mtime_to_ts(dst, inode_set_ctime_current(dst));
3199 f2fs_mark_inode_dirty_sync(dst, false);
3200 }
3201 f2fs_update_time(sbi, REQ_TIME);
3202
3203 out_unlock:
3204 if (src != dst)
3205 inode_unlock(dst);
3206 out:
3207 inode_unlock(src);
3208 return ret;
3209 }
3210
__f2fs_ioc_move_range(struct file * filp,struct f2fs_move_range * range)3211 static int __f2fs_ioc_move_range(struct file *filp,
3212 struct f2fs_move_range *range)
3213 {
3214 int err;
3215
3216 if (!(filp->f_mode & FMODE_READ) ||
3217 !(filp->f_mode & FMODE_WRITE))
3218 return -EBADF;
3219
3220 CLASS(fd, dst)(range->dst_fd);
3221 if (fd_empty(dst))
3222 return -EBADF;
3223
3224 if (!(fd_file(dst)->f_mode & FMODE_WRITE))
3225 return -EBADF;
3226
3227 err = mnt_want_write_file(filp);
3228 if (err)
3229 return err;
3230
3231 err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst),
3232 range->pos_out, range->len);
3233
3234 mnt_drop_write_file(filp);
3235 return err;
3236 }
3237
f2fs_ioc_move_range(struct file * filp,unsigned long arg)3238 static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
3239 {
3240 struct f2fs_move_range range;
3241
3242 if (copy_from_user(&range, (struct f2fs_move_range __user *)arg,
3243 sizeof(range)))
3244 return -EFAULT;
3245 return __f2fs_ioc_move_range(filp, &range);
3246 }
3247
f2fs_ioc_flush_device(struct file * filp,unsigned long arg)3248 static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
3249 {
3250 struct inode *inode = file_inode(filp);
3251 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3252 struct sit_info *sm = SIT_I(sbi);
3253 unsigned int start_segno = 0, end_segno = 0;
3254 unsigned int dev_start_segno = 0, dev_end_segno = 0;
3255 struct f2fs_flush_device range;
3256 struct f2fs_gc_control gc_control = {
3257 .init_gc_type = FG_GC,
3258 .should_migrate_blocks = true,
3259 .err_gc_skipped = true,
3260 .nr_free_secs = 0 };
3261 int ret;
3262
3263 if (!capable(CAP_SYS_ADMIN))
3264 return -EPERM;
3265
3266 if (f2fs_readonly(sbi->sb))
3267 return -EROFS;
3268
3269 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
3270 return -EINVAL;
3271
3272 if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
3273 sizeof(range)))
3274 return -EFAULT;
3275
3276 if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
3277 __is_large_section(sbi)) {
3278 f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1",
3279 range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi));
3280 return -EINVAL;
3281 }
3282
3283 ret = mnt_want_write_file(filp);
3284 if (ret)
3285 return ret;
3286
3287 if (range.dev_num != 0)
3288 dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk);
3289 dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk);
3290
3291 start_segno = sm->last_victim[FLUSH_DEVICE];
3292 if (start_segno < dev_start_segno || start_segno >= dev_end_segno)
3293 start_segno = dev_start_segno;
3294 end_segno = min(start_segno + range.segments, dev_end_segno);
3295
3296 while (start_segno < end_segno) {
3297 if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
3298 ret = -EBUSY;
3299 goto out;
3300 }
3301 sm->last_victim[GC_CB] = end_segno + 1;
3302 sm->last_victim[GC_GREEDY] = end_segno + 1;
3303 sm->last_victim[ALLOC_NEXT] = end_segno + 1;
3304
3305 gc_control.victim_segno = start_segno;
3306 stat_inc_gc_call_count(sbi, FOREGROUND);
3307 ret = f2fs_gc(sbi, &gc_control);
3308 if (ret == -EAGAIN)
3309 ret = 0;
3310 else if (ret < 0)
3311 break;
3312 start_segno++;
3313 }
3314 out:
3315 mnt_drop_write_file(filp);
3316 return ret;
3317 }
3318
f2fs_ioc_get_features(struct file * filp,unsigned long arg)3319 static int f2fs_ioc_get_features(struct file *filp, unsigned long arg)
3320 {
3321 struct inode *inode = file_inode(filp);
3322 u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature);
3323
3324 /* Must validate to set it with SQLite behavior in Android. */
3325 sb_feature |= F2FS_FEATURE_ATOMIC_WRITE;
3326
3327 return put_user(sb_feature, (u32 __user *)arg);
3328 }
3329
3330 #ifdef CONFIG_QUOTA
f2fs_transfer_project_quota(struct inode * inode,kprojid_t kprojid)3331 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
3332 {
3333 struct dquot *transfer_to[MAXQUOTAS] = {};
3334 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3335 struct super_block *sb = sbi->sb;
3336 int err;
3337
3338 transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
3339 if (IS_ERR(transfer_to[PRJQUOTA]))
3340 return PTR_ERR(transfer_to[PRJQUOTA]);
3341
3342 err = __dquot_transfer(inode, transfer_to);
3343 if (err)
3344 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
3345 dqput(transfer_to[PRJQUOTA]);
3346 return err;
3347 }
3348
f2fs_ioc_setproject(struct inode * inode,__u32 projid)3349 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
3350 {
3351 struct f2fs_inode_info *fi = F2FS_I(inode);
3352 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3353 struct f2fs_inode *ri = NULL;
3354 kprojid_t kprojid;
3355 int err;
3356
3357 if (!f2fs_sb_has_project_quota(sbi)) {
3358 if (projid != F2FS_DEF_PROJID)
3359 return -EOPNOTSUPP;
3360 else
3361 return 0;
3362 }
3363
3364 if (!f2fs_has_extra_attr(inode))
3365 return -EOPNOTSUPP;
3366
3367 kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
3368
3369 if (projid_eq(kprojid, fi->i_projid))
3370 return 0;
3371
3372 err = -EPERM;
3373 /* Is it quota file? Do not allow user to mess with it */
3374 if (IS_NOQUOTA(inode))
3375 return err;
3376
3377 if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid))
3378 return -EOVERFLOW;
3379
3380 err = f2fs_dquot_initialize(inode);
3381 if (err)
3382 return err;
3383
3384 f2fs_lock_op(sbi);
3385 err = f2fs_transfer_project_quota(inode, kprojid);
3386 if (err)
3387 goto out_unlock;
3388
3389 fi->i_projid = kprojid;
3390 inode_set_ctime_current(inode);
3391 f2fs_mark_inode_dirty_sync(inode, true);
3392 out_unlock:
3393 f2fs_unlock_op(sbi);
3394 return err;
3395 }
3396 #else
f2fs_transfer_project_quota(struct inode * inode,kprojid_t kprojid)3397 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
3398 {
3399 return 0;
3400 }
3401
f2fs_ioc_setproject(struct inode * inode,__u32 projid)3402 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
3403 {
3404 if (projid != F2FS_DEF_PROJID)
3405 return -EOPNOTSUPP;
3406 return 0;
3407 }
3408 #endif
3409
f2fs_fileattr_get(struct dentry * dentry,struct file_kattr * fa)3410 int f2fs_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
3411 {
3412 struct inode *inode = d_inode(dentry);
3413 struct f2fs_inode_info *fi = F2FS_I(inode);
3414 u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags);
3415
3416 if (IS_ENCRYPTED(inode))
3417 fsflags |= FS_ENCRYPT_FL;
3418 if (IS_VERITY(inode))
3419 fsflags |= FS_VERITY_FL;
3420 if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
3421 fsflags |= FS_INLINE_DATA_FL;
3422 if (is_inode_flag_set(inode, FI_PIN_FILE))
3423 fsflags |= FS_NOCOW_FL;
3424
3425 fileattr_fill_flags(fa, fsflags & F2FS_GETTABLE_FS_FL);
3426
3427 if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)))
3428 fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid);
3429
3430 return 0;
3431 }
3432
f2fs_fileattr_set(struct mnt_idmap * idmap,struct dentry * dentry,struct file_kattr * fa)3433 int f2fs_fileattr_set(struct mnt_idmap *idmap,
3434 struct dentry *dentry, struct file_kattr *fa)
3435 {
3436 struct inode *inode = d_inode(dentry);
3437 u32 fsflags = fa->flags, mask = F2FS_SETTABLE_FS_FL;
3438 u32 iflags;
3439 int err;
3440
3441 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
3442 return -EIO;
3443 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
3444 return -ENOSPC;
3445 if (fsflags & ~F2FS_GETTABLE_FS_FL)
3446 return -EOPNOTSUPP;
3447 fsflags &= F2FS_SETTABLE_FS_FL;
3448 if (!fa->flags_valid)
3449 mask &= FS_COMMON_FL;
3450
3451 iflags = f2fs_fsflags_to_iflags(fsflags);
3452 if (f2fs_mask_flags(inode->i_mode, iflags) != iflags)
3453 return -EOPNOTSUPP;
3454
3455 err = f2fs_setflags_common(inode, iflags, f2fs_fsflags_to_iflags(mask));
3456 if (!err)
3457 err = f2fs_ioc_setproject(inode, fa->fsx_projid);
3458
3459 return err;
3460 }
3461
f2fs_pin_file_control(struct inode * inode,bool inc)3462 int f2fs_pin_file_control(struct inode *inode, bool inc)
3463 {
3464 struct f2fs_inode_info *fi = F2FS_I(inode);
3465 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3466
3467 if (IS_DEVICE_ALIASING(inode))
3468 return -EINVAL;
3469
3470 if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) {
3471 f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials",
3472 __func__, inode->i_ino, fi->i_gc_failures);
3473 clear_inode_flag(inode, FI_PIN_FILE);
3474 return -EAGAIN;
3475 }
3476
3477 /* Use i_gc_failures for normal file as a risk signal. */
3478 if (inc)
3479 f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1);
3480
3481 return 0;
3482 }
3483
f2fs_ioc_set_pin_file(struct file * filp,unsigned long arg)3484 static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
3485 {
3486 struct inode *inode = file_inode(filp);
3487 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3488 __u32 pin;
3489 int ret = 0;
3490
3491 if (get_user(pin, (__u32 __user *)arg))
3492 return -EFAULT;
3493
3494 if (!S_ISREG(inode->i_mode))
3495 return -EINVAL;
3496
3497 if (f2fs_readonly(sbi->sb))
3498 return -EROFS;
3499
3500 if (!pin && IS_DEVICE_ALIASING(inode))
3501 return -EOPNOTSUPP;
3502
3503 ret = mnt_want_write_file(filp);
3504 if (ret)
3505 return ret;
3506
3507 inode_lock(inode);
3508
3509 if (f2fs_is_atomic_file(inode)) {
3510 ret = -EINVAL;
3511 goto out;
3512 }
3513
3514 if (!pin) {
3515 clear_inode_flag(inode, FI_PIN_FILE);
3516 f2fs_i_gc_failures_write(inode, 0);
3517 goto done;
3518 } else if (f2fs_is_pinned_file(inode)) {
3519 goto done;
3520 }
3521
3522 if (F2FS_HAS_BLOCKS(inode)) {
3523 ret = -EFBIG;
3524 goto out;
3525 }
3526
3527 /* Let's allow file pinning on zoned device. */
3528 if (!f2fs_sb_has_blkzoned(sbi) &&
3529 f2fs_should_update_outplace(inode, NULL)) {
3530 ret = -EINVAL;
3531 goto out;
3532 }
3533
3534 if (f2fs_pin_file_control(inode, false)) {
3535 ret = -EAGAIN;
3536 goto out;
3537 }
3538
3539 ret = f2fs_convert_inline_inode(inode);
3540 if (ret)
3541 goto out;
3542
3543 if (!f2fs_disable_compressed_file(inode)) {
3544 ret = -EOPNOTSUPP;
3545 goto out;
3546 }
3547
3548 set_inode_flag(inode, FI_PIN_FILE);
3549 ret = F2FS_I(inode)->i_gc_failures;
3550 done:
3551 f2fs_update_time(sbi, REQ_TIME);
3552 out:
3553 inode_unlock(inode);
3554 mnt_drop_write_file(filp);
3555 return ret;
3556 }
3557
f2fs_ioc_get_pin_file(struct file * filp,unsigned long arg)3558 static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg)
3559 {
3560 struct inode *inode = file_inode(filp);
3561 __u32 pin = 0;
3562
3563 if (is_inode_flag_set(inode, FI_PIN_FILE))
3564 pin = F2FS_I(inode)->i_gc_failures;
3565 return put_user(pin, (u32 __user *)arg);
3566 }
3567
f2fs_ioc_get_dev_alias_file(struct file * filp,unsigned long arg)3568 static int f2fs_ioc_get_dev_alias_file(struct file *filp, unsigned long arg)
3569 {
3570 return put_user(IS_DEVICE_ALIASING(file_inode(filp)) ? 1 : 0,
3571 (u32 __user *)arg);
3572 }
3573
f2fs_ioc_io_prio(struct file * filp,unsigned long arg)3574 static int f2fs_ioc_io_prio(struct file *filp, unsigned long arg)
3575 {
3576 struct inode *inode = file_inode(filp);
3577 __u32 level;
3578
3579 if (get_user(level, (__u32 __user *)arg))
3580 return -EFAULT;
3581
3582 if (!S_ISREG(inode->i_mode) || level >= F2FS_IOPRIO_MAX)
3583 return -EINVAL;
3584
3585 inode_lock(inode);
3586 F2FS_I(inode)->ioprio_hint = level;
3587 inode_unlock(inode);
3588 return 0;
3589 }
3590
f2fs_precache_extents(struct inode * inode)3591 int f2fs_precache_extents(struct inode *inode)
3592 {
3593 struct f2fs_inode_info *fi = F2FS_I(inode);
3594 struct f2fs_map_blocks map;
3595 pgoff_t m_next_extent;
3596 loff_t end;
3597 int err;
3598
3599 if (is_inode_flag_set(inode, FI_NO_EXTENT))
3600 return -EOPNOTSUPP;
3601
3602 map.m_lblk = 0;
3603 map.m_pblk = 0;
3604 map.m_next_pgofs = NULL;
3605 map.m_next_extent = &m_next_extent;
3606 map.m_seg_type = NO_CHECK_TYPE;
3607 map.m_may_create = false;
3608 end = F2FS_BLK_ALIGN(i_size_read(inode));
3609
3610 while (map.m_lblk < end) {
3611 map.m_len = end - map.m_lblk;
3612
3613 f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
3614 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE);
3615 f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
3616 if (err || !map.m_len)
3617 return err;
3618
3619 map.m_lblk = m_next_extent;
3620 }
3621
3622 return 0;
3623 }
3624
f2fs_ioc_precache_extents(struct file * filp)3625 static int f2fs_ioc_precache_extents(struct file *filp)
3626 {
3627 return f2fs_precache_extents(file_inode(filp));
3628 }
3629
f2fs_ioc_resize_fs(struct file * filp,unsigned long arg)3630 static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
3631 {
3632 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
3633 __u64 block_count;
3634
3635 if (!capable(CAP_SYS_ADMIN))
3636 return -EPERM;
3637
3638 if (f2fs_readonly(sbi->sb))
3639 return -EROFS;
3640
3641 if (copy_from_user(&block_count, (void __user *)arg,
3642 sizeof(block_count)))
3643 return -EFAULT;
3644
3645 return f2fs_resize_fs(filp, block_count);
3646 }
3647
f2fs_ioc_enable_verity(struct file * filp,unsigned long arg)3648 static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
3649 {
3650 struct inode *inode = file_inode(filp);
3651
3652 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3653
3654 if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) {
3655 f2fs_warn(F2FS_I_SB(inode),
3656 "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem",
3657 inode->i_ino);
3658 return -EOPNOTSUPP;
3659 }
3660
3661 return fsverity_ioctl_enable(filp, (const void __user *)arg);
3662 }
3663
f2fs_ioc_measure_verity(struct file * filp,unsigned long arg)3664 static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg)
3665 {
3666 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
3667 return -EOPNOTSUPP;
3668
3669 return fsverity_ioctl_measure(filp, (void __user *)arg);
3670 }
3671
f2fs_ioc_read_verity_metadata(struct file * filp,unsigned long arg)3672 static int f2fs_ioc_read_verity_metadata(struct file *filp, unsigned long arg)
3673 {
3674 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
3675 return -EOPNOTSUPP;
3676
3677 return fsverity_ioctl_read_metadata(filp, (const void __user *)arg);
3678 }
3679
f2fs_ioc_getfslabel(struct file * filp,unsigned long arg)3680 static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg)
3681 {
3682 struct inode *inode = file_inode(filp);
3683 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3684 char *vbuf;
3685 int count;
3686 int err = 0;
3687
3688 vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL);
3689 if (!vbuf)
3690 return -ENOMEM;
3691
3692 f2fs_down_read(&sbi->sb_lock);
3693 count = utf16s_to_utf8s(sbi->raw_super->volume_name,
3694 ARRAY_SIZE(sbi->raw_super->volume_name),
3695 UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME);
3696 f2fs_up_read(&sbi->sb_lock);
3697
3698 if (copy_to_user((char __user *)arg, vbuf,
3699 min(FSLABEL_MAX, count)))
3700 err = -EFAULT;
3701
3702 kfree(vbuf);
3703 return err;
3704 }
3705
f2fs_ioc_setfslabel(struct file * filp,unsigned long arg)3706 static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg)
3707 {
3708 struct inode *inode = file_inode(filp);
3709 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3710 char *vbuf;
3711 int err = 0;
3712
3713 if (!capable(CAP_SYS_ADMIN))
3714 return -EPERM;
3715
3716 vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX);
3717 if (IS_ERR(vbuf))
3718 return PTR_ERR(vbuf);
3719
3720 err = mnt_want_write_file(filp);
3721 if (err)
3722 goto out;
3723
3724 f2fs_down_write(&sbi->sb_lock);
3725
3726 memset(sbi->raw_super->volume_name, 0,
3727 sizeof(sbi->raw_super->volume_name));
3728 utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN,
3729 sbi->raw_super->volume_name,
3730 ARRAY_SIZE(sbi->raw_super->volume_name));
3731
3732 err = f2fs_commit_super(sbi, false);
3733
3734 f2fs_up_write(&sbi->sb_lock);
3735
3736 mnt_drop_write_file(filp);
3737 out:
3738 kfree(vbuf);
3739 return err;
3740 }
3741
f2fs_get_compress_blocks(struct inode * inode,__u64 * blocks)3742 static int f2fs_get_compress_blocks(struct inode *inode, __u64 *blocks)
3743 {
3744 if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
3745 return -EOPNOTSUPP;
3746
3747 if (!f2fs_compressed_file(inode))
3748 return -EINVAL;
3749
3750 *blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks);
3751
3752 return 0;
3753 }
3754
f2fs_ioc_get_compress_blocks(struct file * filp,unsigned long arg)3755 static int f2fs_ioc_get_compress_blocks(struct file *filp, unsigned long arg)
3756 {
3757 struct inode *inode = file_inode(filp);
3758 __u64 blocks;
3759 int ret;
3760
3761 ret = f2fs_get_compress_blocks(inode, &blocks);
3762 if (ret < 0)
3763 return ret;
3764
3765 return put_user(blocks, (u64 __user *)arg);
3766 }
3767
release_compress_blocks(struct dnode_of_data * dn,pgoff_t count)3768 static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
3769 {
3770 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
3771 unsigned int released_blocks = 0;
3772 int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
3773 block_t blkaddr;
3774 int i;
3775
3776 for (i = 0; i < count; i++) {
3777 blkaddr = data_blkaddr(dn->inode, dn->node_folio,
3778 dn->ofs_in_node + i);
3779
3780 if (!__is_valid_data_blkaddr(blkaddr))
3781 continue;
3782 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
3783 DATA_GENERIC_ENHANCE)))
3784 return -EFSCORRUPTED;
3785 }
3786
3787 while (count) {
3788 int compr_blocks = 0;
3789
3790 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
3791 blkaddr = f2fs_data_blkaddr(dn);
3792
3793 if (i == 0) {
3794 if (blkaddr == COMPRESS_ADDR)
3795 continue;
3796 dn->ofs_in_node += cluster_size;
3797 goto next;
3798 }
3799
3800 if (__is_valid_data_blkaddr(blkaddr))
3801 compr_blocks++;
3802
3803 if (blkaddr != NEW_ADDR)
3804 continue;
3805
3806 f2fs_set_data_blkaddr(dn, NULL_ADDR);
3807 }
3808
3809 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false);
3810 dec_valid_block_count(sbi, dn->inode,
3811 cluster_size - compr_blocks);
3812
3813 released_blocks += cluster_size - compr_blocks;
3814 next:
3815 count -= cluster_size;
3816 }
3817
3818 return released_blocks;
3819 }
3820
f2fs_release_compress_blocks(struct file * filp,unsigned long arg)3821 static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
3822 {
3823 struct inode *inode = file_inode(filp);
3824 struct f2fs_inode_info *fi = F2FS_I(inode);
3825 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3826 pgoff_t page_idx = 0, last_idx;
3827 unsigned int released_blocks = 0;
3828 int ret;
3829 int writecount;
3830
3831 if (!f2fs_sb_has_compression(sbi))
3832 return -EOPNOTSUPP;
3833
3834 if (f2fs_readonly(sbi->sb))
3835 return -EROFS;
3836
3837 ret = mnt_want_write_file(filp);
3838 if (ret)
3839 return ret;
3840
3841 f2fs_balance_fs(sbi, true);
3842
3843 inode_lock(inode);
3844
3845 writecount = atomic_read(&inode->i_writecount);
3846 if ((filp->f_mode & FMODE_WRITE && writecount != 1) ||
3847 (!(filp->f_mode & FMODE_WRITE) && writecount)) {
3848 ret = -EBUSY;
3849 goto out;
3850 }
3851
3852 if (!f2fs_compressed_file(inode) ||
3853 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
3854 ret = -EINVAL;
3855 goto out;
3856 }
3857
3858 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
3859 if (ret)
3860 goto out;
3861
3862 if (!atomic_read(&fi->i_compr_blocks)) {
3863 ret = -EPERM;
3864 goto out;
3865 }
3866
3867 set_inode_flag(inode, FI_COMPRESS_RELEASED);
3868 inode_set_ctime_current(inode);
3869 f2fs_mark_inode_dirty_sync(inode, true);
3870
3871 f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
3872 filemap_invalidate_lock(inode->i_mapping);
3873
3874 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
3875
3876 while (page_idx < last_idx) {
3877 struct dnode_of_data dn;
3878 pgoff_t end_offset, count;
3879
3880 f2fs_lock_op(sbi);
3881
3882 set_new_dnode(&dn, inode, NULL, NULL, 0);
3883 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
3884 if (ret) {
3885 f2fs_unlock_op(sbi);
3886 if (ret == -ENOENT) {
3887 page_idx = f2fs_get_next_page_offset(&dn,
3888 page_idx);
3889 ret = 0;
3890 continue;
3891 }
3892 break;
3893 }
3894
3895 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
3896 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
3897 count = round_up(count, fi->i_cluster_size);
3898
3899 ret = release_compress_blocks(&dn, count);
3900
3901 f2fs_put_dnode(&dn);
3902
3903 f2fs_unlock_op(sbi);
3904
3905 if (ret < 0)
3906 break;
3907
3908 page_idx += count;
3909 released_blocks += ret;
3910 }
3911
3912 filemap_invalidate_unlock(inode->i_mapping);
3913 f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
3914 out:
3915 if (released_blocks)
3916 f2fs_update_time(sbi, REQ_TIME);
3917 inode_unlock(inode);
3918
3919 mnt_drop_write_file(filp);
3920
3921 if (ret >= 0) {
3922 ret = put_user(released_blocks, (u64 __user *)arg);
3923 } else if (released_blocks &&
3924 atomic_read(&fi->i_compr_blocks)) {
3925 set_sbi_flag(sbi, SBI_NEED_FSCK);
3926 f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
3927 "iblocks=%llu, released=%u, compr_blocks=%u, "
3928 "run fsck to fix.",
3929 __func__, inode->i_ino, inode->i_blocks,
3930 released_blocks,
3931 atomic_read(&fi->i_compr_blocks));
3932 }
3933
3934 return ret;
3935 }
3936
reserve_compress_blocks(struct dnode_of_data * dn,pgoff_t count,unsigned int * reserved_blocks)3937 static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count,
3938 unsigned int *reserved_blocks)
3939 {
3940 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
3941 int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
3942 block_t blkaddr;
3943 int i;
3944
3945 for (i = 0; i < count; i++) {
3946 blkaddr = data_blkaddr(dn->inode, dn->node_folio,
3947 dn->ofs_in_node + i);
3948
3949 if (!__is_valid_data_blkaddr(blkaddr))
3950 continue;
3951 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
3952 DATA_GENERIC_ENHANCE)))
3953 return -EFSCORRUPTED;
3954 }
3955
3956 while (count) {
3957 int compr_blocks = 0;
3958 blkcnt_t reserved = 0;
3959 blkcnt_t to_reserved;
3960 int ret;
3961
3962 for (i = 0; i < cluster_size; i++) {
3963 blkaddr = data_blkaddr(dn->inode, dn->node_folio,
3964 dn->ofs_in_node + i);
3965
3966 if (i == 0) {
3967 if (blkaddr != COMPRESS_ADDR) {
3968 dn->ofs_in_node += cluster_size;
3969 goto next;
3970 }
3971 continue;
3972 }
3973
3974 /*
3975 * compressed cluster was not released due to it
3976 * fails in release_compress_blocks(), so NEW_ADDR
3977 * is a possible case.
3978 */
3979 if (blkaddr == NEW_ADDR) {
3980 reserved++;
3981 continue;
3982 }
3983 if (__is_valid_data_blkaddr(blkaddr)) {
3984 compr_blocks++;
3985 continue;
3986 }
3987 }
3988
3989 to_reserved = cluster_size - compr_blocks - reserved;
3990
3991 /* for the case all blocks in cluster were reserved */
3992 if (reserved && to_reserved == 1) {
3993 dn->ofs_in_node += cluster_size;
3994 goto next;
3995 }
3996
3997 ret = inc_valid_block_count(sbi, dn->inode,
3998 &to_reserved, false);
3999 if (unlikely(ret))
4000 return ret;
4001
4002 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
4003 if (f2fs_data_blkaddr(dn) == NULL_ADDR)
4004 f2fs_set_data_blkaddr(dn, NEW_ADDR);
4005 }
4006
4007 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true);
4008
4009 *reserved_blocks += to_reserved;
4010 next:
4011 count -= cluster_size;
4012 }
4013
4014 return 0;
4015 }
4016
f2fs_reserve_compress_blocks(struct file * filp,unsigned long arg)4017 static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
4018 {
4019 struct inode *inode = file_inode(filp);
4020 struct f2fs_inode_info *fi = F2FS_I(inode);
4021 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4022 pgoff_t page_idx = 0, last_idx;
4023 unsigned int reserved_blocks = 0;
4024 int ret;
4025
4026 if (!f2fs_sb_has_compression(sbi))
4027 return -EOPNOTSUPP;
4028
4029 if (f2fs_readonly(sbi->sb))
4030 return -EROFS;
4031
4032 ret = mnt_want_write_file(filp);
4033 if (ret)
4034 return ret;
4035
4036 f2fs_balance_fs(sbi, true);
4037
4038 inode_lock(inode);
4039
4040 if (!f2fs_compressed_file(inode) ||
4041 !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
4042 ret = -EINVAL;
4043 goto unlock_inode;
4044 }
4045
4046 if (atomic_read(&fi->i_compr_blocks))
4047 goto unlock_inode;
4048
4049 f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
4050 filemap_invalidate_lock(inode->i_mapping);
4051
4052 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
4053
4054 while (page_idx < last_idx) {
4055 struct dnode_of_data dn;
4056 pgoff_t end_offset, count;
4057
4058 f2fs_lock_op(sbi);
4059
4060 set_new_dnode(&dn, inode, NULL, NULL, 0);
4061 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
4062 if (ret) {
4063 f2fs_unlock_op(sbi);
4064 if (ret == -ENOENT) {
4065 page_idx = f2fs_get_next_page_offset(&dn,
4066 page_idx);
4067 ret = 0;
4068 continue;
4069 }
4070 break;
4071 }
4072
4073 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
4074 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
4075 count = round_up(count, fi->i_cluster_size);
4076
4077 ret = reserve_compress_blocks(&dn, count, &reserved_blocks);
4078
4079 f2fs_put_dnode(&dn);
4080
4081 f2fs_unlock_op(sbi);
4082
4083 if (ret < 0)
4084 break;
4085
4086 page_idx += count;
4087 }
4088
4089 filemap_invalidate_unlock(inode->i_mapping);
4090 f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
4091
4092 if (!ret) {
4093 clear_inode_flag(inode, FI_COMPRESS_RELEASED);
4094 inode_set_ctime_current(inode);
4095 f2fs_mark_inode_dirty_sync(inode, true);
4096 }
4097 unlock_inode:
4098 if (reserved_blocks)
4099 f2fs_update_time(sbi, REQ_TIME);
4100 inode_unlock(inode);
4101 mnt_drop_write_file(filp);
4102
4103 if (!ret) {
4104 ret = put_user(reserved_blocks, (u64 __user *)arg);
4105 } else if (reserved_blocks &&
4106 atomic_read(&fi->i_compr_blocks)) {
4107 set_sbi_flag(sbi, SBI_NEED_FSCK);
4108 f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx "
4109 "iblocks=%llu, reserved=%u, compr_blocks=%u, "
4110 "run fsck to fix.",
4111 __func__, inode->i_ino, inode->i_blocks,
4112 reserved_blocks,
4113 atomic_read(&fi->i_compr_blocks));
4114 }
4115
4116 return ret;
4117 }
4118
f2fs_secure_erase(struct block_device * bdev,struct inode * inode,pgoff_t off,block_t block,block_t len,u32 flags)4119 static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode,
4120 pgoff_t off, block_t block, block_t len, u32 flags)
4121 {
4122 sector_t sector = SECTOR_FROM_BLOCK(block);
4123 sector_t nr_sects = SECTOR_FROM_BLOCK(len);
4124 int ret = 0;
4125
4126 if (flags & F2FS_TRIM_FILE_DISCARD) {
4127 if (bdev_max_secure_erase_sectors(bdev))
4128 ret = blkdev_issue_secure_erase(bdev, sector, nr_sects,
4129 GFP_NOFS);
4130 else
4131 ret = blkdev_issue_discard(bdev, sector, nr_sects,
4132 GFP_NOFS);
4133 }
4134
4135 if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) {
4136 if (IS_ENCRYPTED(inode))
4137 ret = fscrypt_zeroout_range(inode, off, block, len);
4138 else
4139 ret = blkdev_issue_zeroout(bdev, sector, nr_sects,
4140 GFP_NOFS, 0);
4141 }
4142
4143 return ret;
4144 }
4145
f2fs_sec_trim_file(struct file * filp,unsigned long arg)4146 static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
4147 {
4148 struct inode *inode = file_inode(filp);
4149 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4150 struct address_space *mapping = inode->i_mapping;
4151 struct block_device *prev_bdev = NULL;
4152 struct f2fs_sectrim_range range;
4153 pgoff_t index, pg_end, prev_index = 0;
4154 block_t prev_block = 0, len = 0;
4155 loff_t end_addr;
4156 bool to_end = false;
4157 int ret = 0;
4158
4159 if (!(filp->f_mode & FMODE_WRITE))
4160 return -EBADF;
4161
4162 if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg,
4163 sizeof(range)))
4164 return -EFAULT;
4165
4166 if (range.flags == 0 || (range.flags & ~F2FS_TRIM_FILE_MASK) ||
4167 !S_ISREG(inode->i_mode))
4168 return -EINVAL;
4169
4170 if (((range.flags & F2FS_TRIM_FILE_DISCARD) &&
4171 !f2fs_hw_support_discard(sbi)) ||
4172 ((range.flags & F2FS_TRIM_FILE_ZEROOUT) &&
4173 IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi)))
4174 return -EOPNOTSUPP;
4175
4176 ret = mnt_want_write_file(filp);
4177 if (ret)
4178 return ret;
4179 inode_lock(inode);
4180
4181 if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) ||
4182 range.start >= inode->i_size) {
4183 ret = -EINVAL;
4184 goto err;
4185 }
4186
4187 if (range.len == 0)
4188 goto err;
4189
4190 if (inode->i_size - range.start > range.len) {
4191 end_addr = range.start + range.len;
4192 } else {
4193 end_addr = range.len == (u64)-1 ?
4194 sbi->sb->s_maxbytes : inode->i_size;
4195 to_end = true;
4196 }
4197
4198 if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) ||
4199 (!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) {
4200 ret = -EINVAL;
4201 goto err;
4202 }
4203
4204 index = F2FS_BYTES_TO_BLK(range.start);
4205 pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE);
4206
4207 ret = f2fs_convert_inline_inode(inode);
4208 if (ret)
4209 goto err;
4210
4211 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
4212 filemap_invalidate_lock(mapping);
4213
4214 ret = filemap_write_and_wait_range(mapping, range.start,
4215 to_end ? LLONG_MAX : end_addr - 1);
4216 if (ret)
4217 goto out;
4218
4219 truncate_inode_pages_range(mapping, range.start,
4220 to_end ? -1 : end_addr - 1);
4221
4222 while (index < pg_end) {
4223 struct dnode_of_data dn;
4224 pgoff_t end_offset, count;
4225 int i;
4226
4227 set_new_dnode(&dn, inode, NULL, NULL, 0);
4228 ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
4229 if (ret) {
4230 if (ret == -ENOENT) {
4231 index = f2fs_get_next_page_offset(&dn, index);
4232 continue;
4233 }
4234 goto out;
4235 }
4236
4237 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
4238 count = min(end_offset - dn.ofs_in_node, pg_end - index);
4239 for (i = 0; i < count; i++, index++, dn.ofs_in_node++) {
4240 struct block_device *cur_bdev;
4241 block_t blkaddr = f2fs_data_blkaddr(&dn);
4242
4243 if (!__is_valid_data_blkaddr(blkaddr))
4244 continue;
4245
4246 if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
4247 DATA_GENERIC_ENHANCE)) {
4248 ret = -EFSCORRUPTED;
4249 f2fs_put_dnode(&dn);
4250 goto out;
4251 }
4252
4253 cur_bdev = f2fs_target_device(sbi, blkaddr, NULL);
4254 if (f2fs_is_multi_device(sbi)) {
4255 int di = f2fs_target_device_index(sbi, blkaddr);
4256
4257 blkaddr -= FDEV(di).start_blk;
4258 }
4259
4260 if (len) {
4261 if (prev_bdev == cur_bdev &&
4262 index == prev_index + len &&
4263 blkaddr == prev_block + len) {
4264 len++;
4265 } else {
4266 ret = f2fs_secure_erase(prev_bdev,
4267 inode, prev_index, prev_block,
4268 len, range.flags);
4269 if (ret) {
4270 f2fs_put_dnode(&dn);
4271 goto out;
4272 }
4273
4274 len = 0;
4275 }
4276 }
4277
4278 if (!len) {
4279 prev_bdev = cur_bdev;
4280 prev_index = index;
4281 prev_block = blkaddr;
4282 len = 1;
4283 }
4284 }
4285
4286 f2fs_put_dnode(&dn);
4287
4288 if (fatal_signal_pending(current)) {
4289 ret = -EINTR;
4290 goto out;
4291 }
4292 cond_resched();
4293 }
4294
4295 if (len)
4296 ret = f2fs_secure_erase(prev_bdev, inode, prev_index,
4297 prev_block, len, range.flags);
4298 f2fs_update_time(sbi, REQ_TIME);
4299 out:
4300 filemap_invalidate_unlock(mapping);
4301 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
4302 err:
4303 inode_unlock(inode);
4304 mnt_drop_write_file(filp);
4305
4306 return ret;
4307 }
4308
f2fs_ioc_get_compress_option(struct file * filp,unsigned long arg)4309 static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg)
4310 {
4311 struct inode *inode = file_inode(filp);
4312 struct f2fs_comp_option option;
4313
4314 if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
4315 return -EOPNOTSUPP;
4316
4317 inode_lock_shared(inode);
4318
4319 if (!f2fs_compressed_file(inode)) {
4320 inode_unlock_shared(inode);
4321 return -ENODATA;
4322 }
4323
4324 option.algorithm = F2FS_I(inode)->i_compress_algorithm;
4325 option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size;
4326
4327 inode_unlock_shared(inode);
4328
4329 if (copy_to_user((struct f2fs_comp_option __user *)arg, &option,
4330 sizeof(option)))
4331 return -EFAULT;
4332
4333 return 0;
4334 }
4335
f2fs_ioc_set_compress_option(struct file * filp,unsigned long arg)4336 static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
4337 {
4338 struct inode *inode = file_inode(filp);
4339 struct f2fs_inode_info *fi = F2FS_I(inode);
4340 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4341 struct f2fs_comp_option option;
4342 int ret = 0;
4343
4344 if (!f2fs_sb_has_compression(sbi))
4345 return -EOPNOTSUPP;
4346
4347 if (!(filp->f_mode & FMODE_WRITE))
4348 return -EBADF;
4349
4350 if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg,
4351 sizeof(option)))
4352 return -EFAULT;
4353
4354 if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
4355 option.log_cluster_size > MAX_COMPRESS_LOG_SIZE ||
4356 option.algorithm >= COMPRESS_MAX)
4357 return -EINVAL;
4358
4359 ret = mnt_want_write_file(filp);
4360 if (ret)
4361 return ret;
4362 inode_lock(inode);
4363
4364 f2fs_down_write(&F2FS_I(inode)->i_sem);
4365 if (!f2fs_compressed_file(inode)) {
4366 ret = -EINVAL;
4367 goto out;
4368 }
4369
4370 if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) {
4371 ret = -EBUSY;
4372 goto out;
4373 }
4374
4375 if (F2FS_HAS_BLOCKS(inode)) {
4376 ret = -EFBIG;
4377 goto out;
4378 }
4379
4380 fi->i_compress_algorithm = option.algorithm;
4381 fi->i_log_cluster_size = option.log_cluster_size;
4382 fi->i_cluster_size = BIT(option.log_cluster_size);
4383 /* Set default level */
4384 if (fi->i_compress_algorithm == COMPRESS_ZSTD)
4385 fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
4386 else
4387 fi->i_compress_level = 0;
4388 /* Adjust mount option level */
4389 if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm &&
4390 F2FS_OPTION(sbi).compress_level)
4391 fi->i_compress_level = F2FS_OPTION(sbi).compress_level;
4392 f2fs_mark_inode_dirty_sync(inode, true);
4393
4394 if (!f2fs_is_compress_backend_ready(inode))
4395 f2fs_warn(sbi, "compression algorithm is successfully set, "
4396 "but current kernel doesn't support this algorithm.");
4397 out:
4398 f2fs_up_write(&fi->i_sem);
4399 inode_unlock(inode);
4400 mnt_drop_write_file(filp);
4401
4402 return ret;
4403 }
4404
redirty_blocks(struct inode * inode,pgoff_t page_idx,int len)4405 static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
4406 {
4407 DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx);
4408 struct address_space *mapping = inode->i_mapping;
4409 struct folio *folio;
4410 pgoff_t redirty_idx = page_idx;
4411 int page_len = 0, ret = 0;
4412
4413 page_cache_ra_unbounded(&ractl, len, 0);
4414
4415 do {
4416 folio = read_cache_folio(mapping, page_idx, NULL, NULL);
4417 if (IS_ERR(folio)) {
4418 ret = PTR_ERR(folio);
4419 break;
4420 }
4421 page_len += folio_nr_pages(folio) - (page_idx - folio->index);
4422 page_idx = folio_next_index(folio);
4423 } while (page_len < len);
4424
4425 do {
4426 folio = filemap_lock_folio(mapping, redirty_idx);
4427
4428 /* It will never fail, when folio has pinned above */
4429 f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(folio));
4430
4431 f2fs_folio_wait_writeback(folio, DATA, true, true);
4432
4433 folio_mark_dirty(folio);
4434 folio_set_f2fs_gcing(folio);
4435 redirty_idx = folio_next_index(folio);
4436 folio_unlock(folio);
4437 folio_put_refs(folio, 2);
4438 } while (redirty_idx < page_idx);
4439
4440 return ret;
4441 }
4442
f2fs_ioc_decompress_file(struct file * filp)4443 static int f2fs_ioc_decompress_file(struct file *filp)
4444 {
4445 struct inode *inode = file_inode(filp);
4446 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4447 struct f2fs_inode_info *fi = F2FS_I(inode);
4448 pgoff_t page_idx = 0, last_idx, cluster_idx;
4449 int ret;
4450
4451 if (!f2fs_sb_has_compression(sbi) ||
4452 F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
4453 return -EOPNOTSUPP;
4454
4455 if (!(filp->f_mode & FMODE_WRITE))
4456 return -EBADF;
4457
4458 f2fs_balance_fs(sbi, true);
4459
4460 ret = mnt_want_write_file(filp);
4461 if (ret)
4462 return ret;
4463 inode_lock(inode);
4464
4465 if (!f2fs_is_compress_backend_ready(inode)) {
4466 ret = -EOPNOTSUPP;
4467 goto out;
4468 }
4469
4470 if (!f2fs_compressed_file(inode) ||
4471 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
4472 ret = -EINVAL;
4473 goto out;
4474 }
4475
4476 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
4477 if (ret)
4478 goto out;
4479
4480 if (!atomic_read(&fi->i_compr_blocks))
4481 goto out;
4482
4483 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
4484 last_idx >>= fi->i_log_cluster_size;
4485
4486 for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) {
4487 page_idx = cluster_idx << fi->i_log_cluster_size;
4488
4489 if (!f2fs_is_compressed_cluster(inode, page_idx))
4490 continue;
4491
4492 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size);
4493 if (ret < 0)
4494 break;
4495
4496 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) {
4497 ret = filemap_fdatawrite(inode->i_mapping);
4498 if (ret < 0)
4499 break;
4500 }
4501
4502 cond_resched();
4503 if (fatal_signal_pending(current)) {
4504 ret = -EINTR;
4505 break;
4506 }
4507 }
4508
4509 if (!ret)
4510 ret = filemap_write_and_wait_range(inode->i_mapping, 0,
4511 LLONG_MAX);
4512
4513 if (ret)
4514 f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.",
4515 __func__, ret);
4516 f2fs_update_time(sbi, REQ_TIME);
4517 out:
4518 inode_unlock(inode);
4519 mnt_drop_write_file(filp);
4520
4521 return ret;
4522 }
4523
f2fs_ioc_compress_file(struct file * filp)4524 static int f2fs_ioc_compress_file(struct file *filp)
4525 {
4526 struct inode *inode = file_inode(filp);
4527 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4528 struct f2fs_inode_info *fi = F2FS_I(inode);
4529 pgoff_t page_idx = 0, last_idx, cluster_idx;
4530 int ret;
4531
4532 if (!f2fs_sb_has_compression(sbi) ||
4533 F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
4534 return -EOPNOTSUPP;
4535
4536 if (!(filp->f_mode & FMODE_WRITE))
4537 return -EBADF;
4538
4539 f2fs_balance_fs(sbi, true);
4540
4541 ret = mnt_want_write_file(filp);
4542 if (ret)
4543 return ret;
4544 inode_lock(inode);
4545
4546 if (!f2fs_is_compress_backend_ready(inode)) {
4547 ret = -EOPNOTSUPP;
4548 goto out;
4549 }
4550
4551 if (!f2fs_compressed_file(inode) ||
4552 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
4553 ret = -EINVAL;
4554 goto out;
4555 }
4556
4557 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
4558 if (ret)
4559 goto out;
4560
4561 set_inode_flag(inode, FI_ENABLE_COMPRESS);
4562
4563 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
4564 last_idx >>= fi->i_log_cluster_size;
4565
4566 for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) {
4567 page_idx = cluster_idx << fi->i_log_cluster_size;
4568
4569 if (f2fs_is_sparse_cluster(inode, page_idx))
4570 continue;
4571
4572 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size);
4573 if (ret < 0)
4574 break;
4575
4576 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) {
4577 ret = filemap_fdatawrite(inode->i_mapping);
4578 if (ret < 0)
4579 break;
4580 }
4581
4582 cond_resched();
4583 if (fatal_signal_pending(current)) {
4584 ret = -EINTR;
4585 break;
4586 }
4587 }
4588
4589 if (!ret)
4590 ret = filemap_write_and_wait_range(inode->i_mapping, 0,
4591 LLONG_MAX);
4592
4593 clear_inode_flag(inode, FI_ENABLE_COMPRESS);
4594
4595 if (ret)
4596 f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.",
4597 __func__, ret);
4598 f2fs_update_time(sbi, REQ_TIME);
4599 out:
4600 inode_unlock(inode);
4601 mnt_drop_write_file(filp);
4602
4603 return ret;
4604 }
4605
__f2fs_ioctl(struct file * filp,unsigned int cmd,unsigned long arg)4606 static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
4607 {
4608 switch (cmd) {
4609 case FS_IOC_GETVERSION:
4610 return f2fs_ioc_getversion(filp, arg);
4611 case F2FS_IOC_START_ATOMIC_WRITE:
4612 return f2fs_ioc_start_atomic_write(filp, false);
4613 case F2FS_IOC_START_ATOMIC_REPLACE:
4614 return f2fs_ioc_start_atomic_write(filp, true);
4615 case F2FS_IOC_COMMIT_ATOMIC_WRITE:
4616 return f2fs_ioc_commit_atomic_write(filp);
4617 case F2FS_IOC_ABORT_ATOMIC_WRITE:
4618 return f2fs_ioc_abort_atomic_write(filp);
4619 case F2FS_IOC_START_VOLATILE_WRITE:
4620 case F2FS_IOC_RELEASE_VOLATILE_WRITE:
4621 return -EOPNOTSUPP;
4622 case F2FS_IOC_SHUTDOWN:
4623 return f2fs_ioc_shutdown(filp, arg);
4624 case FITRIM:
4625 return f2fs_ioc_fitrim(filp, arg);
4626 case FS_IOC_SET_ENCRYPTION_POLICY:
4627 return f2fs_ioc_set_encryption_policy(filp, arg);
4628 case FS_IOC_GET_ENCRYPTION_POLICY:
4629 return f2fs_ioc_get_encryption_policy(filp, arg);
4630 case FS_IOC_GET_ENCRYPTION_PWSALT:
4631 return f2fs_ioc_get_encryption_pwsalt(filp, arg);
4632 case FS_IOC_GET_ENCRYPTION_POLICY_EX:
4633 return f2fs_ioc_get_encryption_policy_ex(filp, arg);
4634 case FS_IOC_ADD_ENCRYPTION_KEY:
4635 return f2fs_ioc_add_encryption_key(filp, arg);
4636 case FS_IOC_REMOVE_ENCRYPTION_KEY:
4637 return f2fs_ioc_remove_encryption_key(filp, arg);
4638 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
4639 return f2fs_ioc_remove_encryption_key_all_users(filp, arg);
4640 case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
4641 return f2fs_ioc_get_encryption_key_status(filp, arg);
4642 case FS_IOC_GET_ENCRYPTION_NONCE:
4643 return f2fs_ioc_get_encryption_nonce(filp, arg);
4644 case F2FS_IOC_GARBAGE_COLLECT:
4645 return f2fs_ioc_gc(filp, arg);
4646 case F2FS_IOC_GARBAGE_COLLECT_RANGE:
4647 return f2fs_ioc_gc_range(filp, arg);
4648 case F2FS_IOC_WRITE_CHECKPOINT:
4649 return f2fs_ioc_write_checkpoint(filp);
4650 case F2FS_IOC_DEFRAGMENT:
4651 return f2fs_ioc_defragment(filp, arg);
4652 case F2FS_IOC_MOVE_RANGE:
4653 return f2fs_ioc_move_range(filp, arg);
4654 case F2FS_IOC_FLUSH_DEVICE:
4655 return f2fs_ioc_flush_device(filp, arg);
4656 case F2FS_IOC_GET_FEATURES:
4657 return f2fs_ioc_get_features(filp, arg);
4658 case F2FS_IOC_GET_PIN_FILE:
4659 return f2fs_ioc_get_pin_file(filp, arg);
4660 case F2FS_IOC_SET_PIN_FILE:
4661 return f2fs_ioc_set_pin_file(filp, arg);
4662 case F2FS_IOC_PRECACHE_EXTENTS:
4663 return f2fs_ioc_precache_extents(filp);
4664 case F2FS_IOC_RESIZE_FS:
4665 return f2fs_ioc_resize_fs(filp, arg);
4666 case FS_IOC_ENABLE_VERITY:
4667 return f2fs_ioc_enable_verity(filp, arg);
4668 case FS_IOC_MEASURE_VERITY:
4669 return f2fs_ioc_measure_verity(filp, arg);
4670 case FS_IOC_READ_VERITY_METADATA:
4671 return f2fs_ioc_read_verity_metadata(filp, arg);
4672 case FS_IOC_GETFSLABEL:
4673 return f2fs_ioc_getfslabel(filp, arg);
4674 case FS_IOC_SETFSLABEL:
4675 return f2fs_ioc_setfslabel(filp, arg);
4676 case F2FS_IOC_GET_COMPRESS_BLOCKS:
4677 return f2fs_ioc_get_compress_blocks(filp, arg);
4678 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
4679 return f2fs_release_compress_blocks(filp, arg);
4680 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
4681 return f2fs_reserve_compress_blocks(filp, arg);
4682 case F2FS_IOC_SEC_TRIM_FILE:
4683 return f2fs_sec_trim_file(filp, arg);
4684 case F2FS_IOC_GET_COMPRESS_OPTION:
4685 return f2fs_ioc_get_compress_option(filp, arg);
4686 case F2FS_IOC_SET_COMPRESS_OPTION:
4687 return f2fs_ioc_set_compress_option(filp, arg);
4688 case F2FS_IOC_DECOMPRESS_FILE:
4689 return f2fs_ioc_decompress_file(filp);
4690 case F2FS_IOC_COMPRESS_FILE:
4691 return f2fs_ioc_compress_file(filp);
4692 case F2FS_IOC_GET_DEV_ALIAS_FILE:
4693 return f2fs_ioc_get_dev_alias_file(filp, arg);
4694 case F2FS_IOC_IO_PRIO:
4695 return f2fs_ioc_io_prio(filp, arg);
4696 default:
4697 return -ENOTTY;
4698 }
4699 }
4700
f2fs_ioctl(struct file * filp,unsigned int cmd,unsigned long arg)4701 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
4702 {
4703 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
4704 return -EIO;
4705 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp))))
4706 return -ENOSPC;
4707
4708 return __f2fs_ioctl(filp, cmd, arg);
4709 }
4710
4711 /*
4712 * Return %true if the given read or write request should use direct I/O, or
4713 * %false if it should use buffered I/O.
4714 */
f2fs_should_use_dio(struct inode * inode,struct kiocb * iocb,struct iov_iter * iter)4715 static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb,
4716 struct iov_iter *iter)
4717 {
4718 unsigned int align;
4719
4720 if (!(iocb->ki_flags & IOCB_DIRECT))
4721 return false;
4722
4723 if (f2fs_force_buffered_io(inode, iov_iter_rw(iter)))
4724 return false;
4725
4726 /*
4727 * Direct I/O not aligned to the disk's logical_block_size will be
4728 * attempted, but will fail with -EINVAL.
4729 *
4730 * f2fs additionally requires that direct I/O be aligned to the
4731 * filesystem block size, which is often a stricter requirement.
4732 * However, f2fs traditionally falls back to buffered I/O on requests
4733 * that are logical_block_size-aligned but not fs-block aligned.
4734 *
4735 * The below logic implements this behavior.
4736 */
4737 align = iocb->ki_pos | iov_iter_alignment(iter);
4738 if (!IS_ALIGNED(align, i_blocksize(inode)) &&
4739 IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev)))
4740 return false;
4741
4742 return true;
4743 }
4744
f2fs_dio_read_end_io(struct kiocb * iocb,ssize_t size,int error,unsigned int flags)4745 static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error,
4746 unsigned int flags)
4747 {
4748 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
4749
4750 dec_page_count(sbi, F2FS_DIO_READ);
4751 if (error)
4752 return error;
4753 f2fs_update_iostat(sbi, NULL, APP_DIRECT_READ_IO, size);
4754 return 0;
4755 }
4756
4757 static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = {
4758 .end_io = f2fs_dio_read_end_io,
4759 };
4760
f2fs_dio_read_iter(struct kiocb * iocb,struct iov_iter * to)4761 static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
4762 {
4763 struct file *file = iocb->ki_filp;
4764 struct inode *inode = file_inode(file);
4765 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4766 struct f2fs_inode_info *fi = F2FS_I(inode);
4767 const loff_t pos = iocb->ki_pos;
4768 const size_t count = iov_iter_count(to);
4769 struct iomap_dio *dio;
4770 ssize_t ret;
4771
4772 if (count == 0)
4773 return 0; /* skip atime update */
4774
4775 trace_f2fs_direct_IO_enter(inode, iocb, count, READ);
4776
4777 if (iocb->ki_flags & IOCB_NOWAIT) {
4778 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
4779 ret = -EAGAIN;
4780 goto out;
4781 }
4782 } else {
4783 f2fs_down_read(&fi->i_gc_rwsem[READ]);
4784 }
4785
4786 /* dio is not compatible w/ atomic file */
4787 if (f2fs_is_atomic_file(inode)) {
4788 f2fs_up_read(&fi->i_gc_rwsem[READ]);
4789 ret = -EOPNOTSUPP;
4790 goto out;
4791 }
4792
4793 /*
4794 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
4795 * the higher-level function iomap_dio_rw() in order to ensure that the
4796 * F2FS_DIO_READ counter will be decremented correctly in all cases.
4797 */
4798 inc_page_count(sbi, F2FS_DIO_READ);
4799 dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops,
4800 &f2fs_iomap_dio_read_ops, 0, NULL, 0);
4801 if (IS_ERR_OR_NULL(dio)) {
4802 ret = PTR_ERR_OR_ZERO(dio);
4803 if (ret != -EIOCBQUEUED)
4804 dec_page_count(sbi, F2FS_DIO_READ);
4805 } else {
4806 ret = iomap_dio_complete(dio);
4807 }
4808
4809 f2fs_up_read(&fi->i_gc_rwsem[READ]);
4810
4811 file_accessed(file);
4812 out:
4813 trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
4814 return ret;
4815 }
4816
f2fs_trace_rw_file_path(struct file * file,loff_t pos,size_t count,int rw)4817 static void f2fs_trace_rw_file_path(struct file *file, loff_t pos, size_t count,
4818 int rw)
4819 {
4820 struct inode *inode = file_inode(file);
4821 char *buf, *path;
4822
4823 buf = f2fs_getname(F2FS_I_SB(inode));
4824 if (!buf)
4825 return;
4826 path = dentry_path_raw(file_dentry(file), buf, PATH_MAX);
4827 if (IS_ERR(path))
4828 goto free_buf;
4829 if (rw == WRITE)
4830 trace_f2fs_datawrite_start(inode, pos, count,
4831 current->pid, path, current->comm);
4832 else
4833 trace_f2fs_dataread_start(inode, pos, count,
4834 current->pid, path, current->comm);
4835 free_buf:
4836 f2fs_putname(buf);
4837 }
4838
f2fs_file_read_iter(struct kiocb * iocb,struct iov_iter * to)4839 static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
4840 {
4841 struct inode *inode = file_inode(iocb->ki_filp);
4842 const loff_t pos = iocb->ki_pos;
4843 ssize_t ret;
4844 bool dio;
4845
4846 if (!f2fs_is_compress_backend_ready(inode))
4847 return -EOPNOTSUPP;
4848
4849 if (trace_f2fs_dataread_start_enabled())
4850 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos,
4851 iov_iter_count(to), READ);
4852
4853 dio = f2fs_should_use_dio(inode, iocb, to);
4854
4855 /* In LFS mode, if there is inflight dio, wait for its completion */
4856 if (f2fs_lfs_mode(F2FS_I_SB(inode)) &&
4857 get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) &&
4858 (!f2fs_is_pinned_file(inode) || !dio))
4859 inode_dio_wait(inode);
4860
4861 if (dio) {
4862 ret = f2fs_dio_read_iter(iocb, to);
4863 } else {
4864 ret = filemap_read(iocb, to, 0);
4865 if (ret > 0)
4866 f2fs_update_iostat(F2FS_I_SB(inode), inode,
4867 APP_BUFFERED_READ_IO, ret);
4868 }
4869 trace_f2fs_dataread_end(inode, pos, ret);
4870 return ret;
4871 }
4872
f2fs_file_splice_read(struct file * in,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)4873 static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos,
4874 struct pipe_inode_info *pipe,
4875 size_t len, unsigned int flags)
4876 {
4877 struct inode *inode = file_inode(in);
4878 const loff_t pos = *ppos;
4879 ssize_t ret;
4880
4881 if (!f2fs_is_compress_backend_ready(inode))
4882 return -EOPNOTSUPP;
4883
4884 if (trace_f2fs_dataread_start_enabled())
4885 f2fs_trace_rw_file_path(in, pos, len, READ);
4886
4887 ret = filemap_splice_read(in, ppos, pipe, len, flags);
4888 if (ret > 0)
4889 f2fs_update_iostat(F2FS_I_SB(inode), inode,
4890 APP_BUFFERED_READ_IO, ret);
4891
4892 trace_f2fs_dataread_end(inode, pos, ret);
4893 return ret;
4894 }
4895
f2fs_write_checks(struct kiocb * iocb,struct iov_iter * from)4896 static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
4897 {
4898 struct file *file = iocb->ki_filp;
4899 struct inode *inode = file_inode(file);
4900 ssize_t count;
4901 int err;
4902
4903 if (IS_IMMUTABLE(inode))
4904 return -EPERM;
4905
4906 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
4907 return -EPERM;
4908
4909 count = generic_write_checks(iocb, from);
4910 if (count <= 0)
4911 return count;
4912
4913 err = file_modified(file);
4914 if (err)
4915 return err;
4916
4917 filemap_invalidate_lock(inode->i_mapping);
4918 f2fs_zero_post_eof_page(inode, iocb->ki_pos + iov_iter_count(from));
4919 filemap_invalidate_unlock(inode->i_mapping);
4920 return count;
4921 }
4922
4923 /*
4924 * Preallocate blocks for a write request, if it is possible and helpful to do
4925 * so. Returns a positive number if blocks may have been preallocated, 0 if no
4926 * blocks were preallocated, or a negative errno value if something went
4927 * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the
4928 * requested blocks (not just some of them) have been allocated.
4929 */
f2fs_preallocate_blocks(struct kiocb * iocb,struct iov_iter * iter,bool dio)4930 static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
4931 bool dio)
4932 {
4933 struct inode *inode = file_inode(iocb->ki_filp);
4934 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4935 const loff_t pos = iocb->ki_pos;
4936 const size_t count = iov_iter_count(iter);
4937 struct f2fs_map_blocks map = {};
4938 int flag;
4939 int ret;
4940
4941 /* If it will be an out-of-place direct write, don't bother. */
4942 if (dio && f2fs_lfs_mode(sbi))
4943 return 0;
4944 /*
4945 * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
4946 * buffered IO, if DIO meets any holes.
4947 */
4948 if (dio && i_size_read(inode) &&
4949 (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
4950 return 0;
4951
4952 /* No-wait I/O can't allocate blocks. */
4953 if (iocb->ki_flags & IOCB_NOWAIT)
4954 return 0;
4955
4956 /* If it will be a short write, don't bother. */
4957 if (fault_in_iov_iter_readable(iter, count))
4958 return 0;
4959
4960 if (f2fs_has_inline_data(inode)) {
4961 /* If the data will fit inline, don't bother. */
4962 if (pos + count <= MAX_INLINE_DATA(inode))
4963 return 0;
4964 ret = f2fs_convert_inline_inode(inode);
4965 if (ret)
4966 return ret;
4967 }
4968
4969 /* Do not preallocate blocks that will be written partially in 4KB. */
4970 map.m_lblk = F2FS_BLK_ALIGN(pos);
4971 map.m_len = F2FS_BYTES_TO_BLK(pos + count);
4972 if (map.m_len > map.m_lblk)
4973 map.m_len -= map.m_lblk;
4974 else
4975 return 0;
4976
4977 if (!IS_DEVICE_ALIASING(inode))
4978 map.m_may_create = true;
4979 if (dio) {
4980 map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi,
4981 inode->i_write_hint);
4982 flag = F2FS_GET_BLOCK_PRE_DIO;
4983 } else {
4984 map.m_seg_type = NO_CHECK_TYPE;
4985 flag = F2FS_GET_BLOCK_PRE_AIO;
4986 }
4987
4988 ret = f2fs_map_blocks(inode, &map, flag);
4989 /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */
4990 if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0))
4991 return ret;
4992 if (ret == 0)
4993 set_inode_flag(inode, FI_PREALLOCATED_ALL);
4994 return map.m_len;
4995 }
4996
f2fs_buffered_write_iter(struct kiocb * iocb,struct iov_iter * from)4997 static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
4998 struct iov_iter *from)
4999 {
5000 struct file *file = iocb->ki_filp;
5001 struct inode *inode = file_inode(file);
5002 ssize_t ret;
5003
5004 if (iocb->ki_flags & IOCB_NOWAIT)
5005 return -EOPNOTSUPP;
5006
5007 ret = generic_perform_write(iocb, from);
5008
5009 if (ret > 0) {
5010 f2fs_update_iostat(F2FS_I_SB(inode), inode,
5011 APP_BUFFERED_IO, ret);
5012 }
5013 return ret;
5014 }
5015
f2fs_dio_write_end_io(struct kiocb * iocb,ssize_t size,int error,unsigned int flags)5016 static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
5017 unsigned int flags)
5018 {
5019 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
5020
5021 dec_page_count(sbi, F2FS_DIO_WRITE);
5022 if (error)
5023 return error;
5024 f2fs_update_time(sbi, REQ_TIME);
5025 f2fs_update_iostat(sbi, NULL, APP_DIRECT_IO, size);
5026 return 0;
5027 }
5028
f2fs_dio_write_submit_io(const struct iomap_iter * iter,struct bio * bio,loff_t file_offset)5029 static void f2fs_dio_write_submit_io(const struct iomap_iter *iter,
5030 struct bio *bio, loff_t file_offset)
5031 {
5032 struct inode *inode = iter->inode;
5033 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
5034 enum log_type type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint);
5035 enum temp_type temp = f2fs_get_segment_temp(sbi, type);
5036
5037 bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp);
5038 submit_bio(bio);
5039 }
5040
5041 static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = {
5042 .end_io = f2fs_dio_write_end_io,
5043 .submit_io = f2fs_dio_write_submit_io,
5044 };
5045
f2fs_flush_buffered_write(struct address_space * mapping,loff_t start_pos,loff_t end_pos)5046 static void f2fs_flush_buffered_write(struct address_space *mapping,
5047 loff_t start_pos, loff_t end_pos)
5048 {
5049 int ret;
5050
5051 ret = filemap_write_and_wait_range(mapping, start_pos, end_pos);
5052 if (ret < 0)
5053 return;
5054 invalidate_mapping_pages(mapping,
5055 start_pos >> PAGE_SHIFT,
5056 end_pos >> PAGE_SHIFT);
5057 }
5058
f2fs_dio_write_iter(struct kiocb * iocb,struct iov_iter * from,bool * may_need_sync)5059 static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
5060 bool *may_need_sync)
5061 {
5062 struct file *file = iocb->ki_filp;
5063 struct inode *inode = file_inode(file);
5064 struct f2fs_inode_info *fi = F2FS_I(inode);
5065 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
5066 const bool do_opu = f2fs_lfs_mode(sbi);
5067 const loff_t pos = iocb->ki_pos;
5068 const ssize_t count = iov_iter_count(from);
5069 unsigned int dio_flags;
5070 struct iomap_dio *dio;
5071 ssize_t ret;
5072
5073 trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE);
5074
5075 if (iocb->ki_flags & IOCB_NOWAIT) {
5076 /* f2fs_convert_inline_inode() and block allocation can block */
5077 if (f2fs_has_inline_data(inode) ||
5078 !f2fs_overwrite_io(inode, pos, count)) {
5079 ret = -EAGAIN;
5080 goto out;
5081 }
5082
5083 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
5084 ret = -EAGAIN;
5085 goto out;
5086 }
5087 if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
5088 f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
5089 ret = -EAGAIN;
5090 goto out;
5091 }
5092 } else {
5093 ret = f2fs_convert_inline_inode(inode);
5094 if (ret)
5095 goto out;
5096
5097 f2fs_down_read(&fi->i_gc_rwsem[WRITE]);
5098 if (do_opu)
5099 f2fs_down_read(&fi->i_gc_rwsem[READ]);
5100 }
5101
5102 /*
5103 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
5104 * the higher-level function iomap_dio_rw() in order to ensure that the
5105 * F2FS_DIO_WRITE counter will be decremented correctly in all cases.
5106 */
5107 inc_page_count(sbi, F2FS_DIO_WRITE);
5108 dio_flags = 0;
5109 if (pos + count > inode->i_size)
5110 dio_flags |= IOMAP_DIO_FORCE_WAIT;
5111 dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
5112 &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0);
5113 if (IS_ERR_OR_NULL(dio)) {
5114 ret = PTR_ERR_OR_ZERO(dio);
5115 if (ret == -ENOTBLK)
5116 ret = 0;
5117 if (ret != -EIOCBQUEUED)
5118 dec_page_count(sbi, F2FS_DIO_WRITE);
5119 } else {
5120 ret = iomap_dio_complete(dio);
5121 }
5122
5123 if (do_opu)
5124 f2fs_up_read(&fi->i_gc_rwsem[READ]);
5125 f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
5126
5127 if (ret < 0)
5128 goto out;
5129 if (pos + ret > inode->i_size)
5130 f2fs_i_size_write(inode, pos + ret);
5131 if (!do_opu)
5132 set_inode_flag(inode, FI_UPDATE_WRITE);
5133
5134 if (iov_iter_count(from)) {
5135 ssize_t ret2;
5136 loff_t bufio_start_pos = iocb->ki_pos;
5137
5138 /*
5139 * The direct write was partial, so we need to fall back to a
5140 * buffered write for the remainder.
5141 */
5142
5143 ret2 = f2fs_buffered_write_iter(iocb, from);
5144 if (iov_iter_count(from))
5145 f2fs_write_failed(inode, iocb->ki_pos);
5146 if (ret2 < 0)
5147 goto out;
5148
5149 /*
5150 * Ensure that the pagecache pages are written to disk and
5151 * invalidated to preserve the expected O_DIRECT semantics.
5152 */
5153 if (ret2 > 0) {
5154 loff_t bufio_end_pos = bufio_start_pos + ret2 - 1;
5155
5156 ret += ret2;
5157
5158 f2fs_flush_buffered_write(file->f_mapping,
5159 bufio_start_pos,
5160 bufio_end_pos);
5161 }
5162 } else {
5163 /* iomap_dio_rw() already handled the generic_write_sync(). */
5164 *may_need_sync = false;
5165 }
5166 out:
5167 trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret);
5168 return ret;
5169 }
5170
f2fs_file_write_iter(struct kiocb * iocb,struct iov_iter * from)5171 static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
5172 {
5173 struct inode *inode = file_inode(iocb->ki_filp);
5174 const loff_t orig_pos = iocb->ki_pos;
5175 const size_t orig_count = iov_iter_count(from);
5176 loff_t target_size;
5177 bool dio;
5178 bool may_need_sync = true;
5179 int preallocated;
5180 const loff_t pos = iocb->ki_pos;
5181 const ssize_t count = iov_iter_count(from);
5182 ssize_t ret;
5183
5184 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
5185 ret = -EIO;
5186 goto out;
5187 }
5188
5189 if (!f2fs_is_compress_backend_ready(inode)) {
5190 ret = -EOPNOTSUPP;
5191 goto out;
5192 }
5193
5194 if (iocb->ki_flags & IOCB_NOWAIT) {
5195 if (!inode_trylock(inode)) {
5196 ret = -EAGAIN;
5197 goto out;
5198 }
5199 } else {
5200 inode_lock(inode);
5201 }
5202
5203 if (f2fs_is_pinned_file(inode) &&
5204 !f2fs_overwrite_io(inode, pos, count)) {
5205 ret = -EIO;
5206 goto out_unlock;
5207 }
5208
5209 ret = f2fs_write_checks(iocb, from);
5210 if (ret <= 0)
5211 goto out_unlock;
5212
5213 /* Determine whether we will do a direct write or a buffered write. */
5214 dio = f2fs_should_use_dio(inode, iocb, from);
5215
5216 /* dio is not compatible w/ atomic write */
5217 if (dio && f2fs_is_atomic_file(inode)) {
5218 ret = -EOPNOTSUPP;
5219 goto out_unlock;
5220 }
5221
5222 /* Possibly preallocate the blocks for the write. */
5223 target_size = iocb->ki_pos + iov_iter_count(from);
5224 preallocated = f2fs_preallocate_blocks(iocb, from, dio);
5225 if (preallocated < 0) {
5226 ret = preallocated;
5227 } else {
5228 if (trace_f2fs_datawrite_start_enabled())
5229 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos,
5230 orig_count, WRITE);
5231
5232 /* Do the actual write. */
5233 ret = dio ?
5234 f2fs_dio_write_iter(iocb, from, &may_need_sync) :
5235 f2fs_buffered_write_iter(iocb, from);
5236
5237 trace_f2fs_datawrite_end(inode, orig_pos, ret);
5238 }
5239
5240 /* Don't leave any preallocated blocks around past i_size. */
5241 if (preallocated && i_size_read(inode) < target_size) {
5242 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
5243 filemap_invalidate_lock(inode->i_mapping);
5244 if (!f2fs_truncate(inode))
5245 file_dont_truncate(inode);
5246 filemap_invalidate_unlock(inode->i_mapping);
5247 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
5248 } else {
5249 file_dont_truncate(inode);
5250 }
5251
5252 clear_inode_flag(inode, FI_PREALLOCATED_ALL);
5253 out_unlock:
5254 inode_unlock(inode);
5255 out:
5256 trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
5257
5258 if (ret > 0 && may_need_sync)
5259 ret = generic_write_sync(iocb, ret);
5260
5261 /* If buffered IO was forced, flush and drop the data from
5262 * the page cache to preserve O_DIRECT semantics
5263 */
5264 if (ret > 0 && !dio && (iocb->ki_flags & IOCB_DIRECT))
5265 f2fs_flush_buffered_write(iocb->ki_filp->f_mapping,
5266 orig_pos,
5267 orig_pos + ret - 1);
5268
5269 return ret;
5270 }
5271
f2fs_file_fadvise(struct file * filp,loff_t offset,loff_t len,int advice)5272 static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
5273 int advice)
5274 {
5275 struct address_space *mapping;
5276 struct backing_dev_info *bdi;
5277 struct inode *inode = file_inode(filp);
5278 int err;
5279
5280 if (advice == POSIX_FADV_SEQUENTIAL) {
5281 if (S_ISFIFO(inode->i_mode))
5282 return -ESPIPE;
5283
5284 mapping = filp->f_mapping;
5285 if (!mapping || len < 0)
5286 return -EINVAL;
5287
5288 bdi = inode_to_bdi(mapping->host);
5289 filp->f_ra.ra_pages = bdi->ra_pages *
5290 F2FS_I_SB(inode)->seq_file_ra_mul;
5291 spin_lock(&filp->f_lock);
5292 filp->f_mode &= ~FMODE_RANDOM;
5293 spin_unlock(&filp->f_lock);
5294 return 0;
5295 } else if (advice == POSIX_FADV_WILLNEED && offset == 0) {
5296 /* Load extent cache at the first readahead. */
5297 f2fs_precache_extents(inode);
5298 }
5299
5300 err = generic_fadvise(filp, offset, len, advice);
5301 if (err)
5302 return err;
5303
5304 if (advice == POSIX_FADV_DONTNEED &&
5305 (test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) &&
5306 f2fs_compressed_file(inode)))
5307 f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino);
5308 else if (advice == POSIX_FADV_NOREUSE)
5309 err = f2fs_keep_noreuse_range(inode, offset, len);
5310 return err;
5311 }
5312
5313 #ifdef CONFIG_COMPAT
5314 struct compat_f2fs_gc_range {
5315 u32 sync;
5316 compat_u64 start;
5317 compat_u64 len;
5318 };
5319 #define F2FS_IOC32_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11,\
5320 struct compat_f2fs_gc_range)
5321
f2fs_compat_ioc_gc_range(struct file * file,unsigned long arg)5322 static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg)
5323 {
5324 struct compat_f2fs_gc_range __user *urange;
5325 struct f2fs_gc_range range;
5326 int err;
5327
5328 urange = compat_ptr(arg);
5329 err = get_user(range.sync, &urange->sync);
5330 err |= get_user(range.start, &urange->start);
5331 err |= get_user(range.len, &urange->len);
5332 if (err)
5333 return -EFAULT;
5334
5335 return __f2fs_ioc_gc_range(file, &range);
5336 }
5337
5338 struct compat_f2fs_move_range {
5339 u32 dst_fd;
5340 compat_u64 pos_in;
5341 compat_u64 pos_out;
5342 compat_u64 len;
5343 };
5344 #define F2FS_IOC32_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \
5345 struct compat_f2fs_move_range)
5346
f2fs_compat_ioc_move_range(struct file * file,unsigned long arg)5347 static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg)
5348 {
5349 struct compat_f2fs_move_range __user *urange;
5350 struct f2fs_move_range range;
5351 int err;
5352
5353 urange = compat_ptr(arg);
5354 err = get_user(range.dst_fd, &urange->dst_fd);
5355 err |= get_user(range.pos_in, &urange->pos_in);
5356 err |= get_user(range.pos_out, &urange->pos_out);
5357 err |= get_user(range.len, &urange->len);
5358 if (err)
5359 return -EFAULT;
5360
5361 return __f2fs_ioc_move_range(file, &range);
5362 }
5363
f2fs_compat_ioctl(struct file * file,unsigned int cmd,unsigned long arg)5364 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
5365 {
5366 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
5367 return -EIO;
5368 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file))))
5369 return -ENOSPC;
5370
5371 switch (cmd) {
5372 case FS_IOC32_GETVERSION:
5373 cmd = FS_IOC_GETVERSION;
5374 break;
5375 case F2FS_IOC32_GARBAGE_COLLECT_RANGE:
5376 return f2fs_compat_ioc_gc_range(file, arg);
5377 case F2FS_IOC32_MOVE_RANGE:
5378 return f2fs_compat_ioc_move_range(file, arg);
5379 case F2FS_IOC_START_ATOMIC_WRITE:
5380 case F2FS_IOC_START_ATOMIC_REPLACE:
5381 case F2FS_IOC_COMMIT_ATOMIC_WRITE:
5382 case F2FS_IOC_START_VOLATILE_WRITE:
5383 case F2FS_IOC_RELEASE_VOLATILE_WRITE:
5384 case F2FS_IOC_ABORT_ATOMIC_WRITE:
5385 case F2FS_IOC_SHUTDOWN:
5386 case FITRIM:
5387 case FS_IOC_SET_ENCRYPTION_POLICY:
5388 case FS_IOC_GET_ENCRYPTION_PWSALT:
5389 case FS_IOC_GET_ENCRYPTION_POLICY:
5390 case FS_IOC_GET_ENCRYPTION_POLICY_EX:
5391 case FS_IOC_ADD_ENCRYPTION_KEY:
5392 case FS_IOC_REMOVE_ENCRYPTION_KEY:
5393 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
5394 case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
5395 case FS_IOC_GET_ENCRYPTION_NONCE:
5396 case F2FS_IOC_GARBAGE_COLLECT:
5397 case F2FS_IOC_WRITE_CHECKPOINT:
5398 case F2FS_IOC_DEFRAGMENT:
5399 case F2FS_IOC_FLUSH_DEVICE:
5400 case F2FS_IOC_GET_FEATURES:
5401 case F2FS_IOC_GET_PIN_FILE:
5402 case F2FS_IOC_SET_PIN_FILE:
5403 case F2FS_IOC_PRECACHE_EXTENTS:
5404 case F2FS_IOC_RESIZE_FS:
5405 case FS_IOC_ENABLE_VERITY:
5406 case FS_IOC_MEASURE_VERITY:
5407 case FS_IOC_READ_VERITY_METADATA:
5408 case FS_IOC_GETFSLABEL:
5409 case FS_IOC_SETFSLABEL:
5410 case F2FS_IOC_GET_COMPRESS_BLOCKS:
5411 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
5412 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
5413 case F2FS_IOC_SEC_TRIM_FILE:
5414 case F2FS_IOC_GET_COMPRESS_OPTION:
5415 case F2FS_IOC_SET_COMPRESS_OPTION:
5416 case F2FS_IOC_DECOMPRESS_FILE:
5417 case F2FS_IOC_COMPRESS_FILE:
5418 case F2FS_IOC_GET_DEV_ALIAS_FILE:
5419 case F2FS_IOC_IO_PRIO:
5420 break;
5421 default:
5422 return -ENOIOCTLCMD;
5423 }
5424 return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
5425 }
5426 #endif
5427
5428 const struct file_operations f2fs_file_operations = {
5429 .llseek = f2fs_llseek,
5430 .read_iter = f2fs_file_read_iter,
5431 .write_iter = f2fs_file_write_iter,
5432 .iopoll = iocb_bio_iopoll,
5433 .open = f2fs_file_open,
5434 .release = f2fs_release_file,
5435 .mmap_prepare = f2fs_file_mmap_prepare,
5436 .flush = f2fs_file_flush,
5437 .fsync = f2fs_sync_file,
5438 .fallocate = f2fs_fallocate,
5439 .unlocked_ioctl = f2fs_ioctl,
5440 #ifdef CONFIG_COMPAT
5441 .compat_ioctl = f2fs_compat_ioctl,
5442 #endif
5443 .splice_read = f2fs_file_splice_read,
5444 .splice_write = iter_file_splice_write,
5445 .fadvise = f2fs_file_fadvise,
5446 .fop_flags = FOP_BUFFER_RASYNC,
5447 };
5448