1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/f2fs/file.c 4 * 5 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 6 * http://www.samsung.com/ 7 */ 8 #include <linux/blk-crypto.h> 9 #include <linux/fs.h> 10 #include <linux/f2fs_fs.h> 11 #include <linux/stat.h> 12 #include <linux/writeback.h> 13 #include <linux/blkdev.h> 14 #include <linux/falloc.h> 15 #include <linux/filelock.h> 16 #include <linux/types.h> 17 #include <linux/compat.h> 18 #include <linux/uaccess.h> 19 #include <linux/mount.h> 20 #include <linux/pagevec.h> 21 #include <linux/uio.h> 22 #include <linux/uuid.h> 23 #include <linux/file.h> 24 #include <linux/nls.h> 25 #include <linux/sched/signal.h> 26 #include <linux/fileattr.h> 27 #include <linux/fadvise.h> 28 #include <linux/iomap.h> 29 30 #include "f2fs.h" 31 #include "node.h" 32 #include "segment.h" 33 #include "xattr.h" 34 #include "acl.h" 35 #include "gc.h" 36 #include "iostat.h" 37 #include <trace/events/f2fs.h> 38 #include <uapi/linux/f2fs.h> 39 40 static void f2fs_zero_post_eof_page(struct inode *inode, 41 loff_t new_size, bool lock) 42 { 43 loff_t old_size = i_size_read(inode); 44 45 if (old_size >= new_size) 46 return; 47 48 if (mapping_empty(inode->i_mapping)) 49 return; 50 51 if (lock) 52 filemap_invalidate_lock(inode->i_mapping); 53 /* zero or drop pages only in range of [old_size, new_size] */ 54 truncate_inode_pages_range(inode->i_mapping, old_size, new_size); 55 if (lock) 56 filemap_invalidate_unlock(inode->i_mapping); 57 } 58 59 static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) 60 { 61 struct inode *inode = file_inode(vmf->vma->vm_file); 62 vm_flags_t flags = vmf->vma->vm_flags; 63 vm_fault_t ret; 64 65 ret = filemap_fault(vmf); 66 if (ret & VM_FAULT_LOCKED) 67 f2fs_update_iostat(F2FS_I_SB(inode), inode, 68 APP_MAPPED_READ_IO, F2FS_BLKSIZE); 69 70 trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret); 71 72 return ret; 73 } 74 75 static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) 76 { 77 struct folio *folio = page_folio(vmf->page); 78 struct inode *inode = file_inode(vmf->vma->vm_file); 79 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 80 struct dnode_of_data dn; 81 bool need_alloc = !f2fs_is_pinned_file(inode); 82 int err = 0; 83 vm_fault_t ret; 84 85 if (unlikely(IS_IMMUTABLE(inode))) 86 return VM_FAULT_SIGBUS; 87 88 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 89 err = -EIO; 90 goto out; 91 } 92 93 if (unlikely(f2fs_cp_error(sbi))) { 94 err = -EIO; 95 goto out; 96 } 97 98 if (!f2fs_is_checkpoint_ready(sbi)) { 99 err = -ENOSPC; 100 goto out; 101 } 102 103 err = f2fs_convert_inline_inode(inode); 104 if (err) 105 goto out; 106 107 #ifdef CONFIG_F2FS_FS_COMPRESSION 108 if (f2fs_compressed_file(inode)) { 109 int ret = f2fs_is_compressed_cluster(inode, folio->index); 110 111 if (ret < 0) { 112 err = ret; 113 goto out; 114 } else if (ret) { 115 need_alloc = false; 116 } 117 } 118 #endif 119 /* should do out of any locked page */ 120 if (need_alloc) 121 f2fs_balance_fs(sbi, true); 122 123 sb_start_pagefault(inode->i_sb); 124 125 f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); 126 127 f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT, true); 128 129 file_update_time(vmf->vma->vm_file); 130 filemap_invalidate_lock_shared(inode->i_mapping); 131 132 folio_lock(folio); 133 if (unlikely(folio->mapping != inode->i_mapping || 134 folio_pos(folio) > i_size_read(inode) || 135 !folio_test_uptodate(folio))) { 136 folio_unlock(folio); 137 err = -EFAULT; 138 goto out_sem; 139 } 140 141 set_new_dnode(&dn, inode, NULL, NULL, 0); 142 if (need_alloc) { 143 /* block allocation */ 144 err = f2fs_get_block_locked(&dn, folio->index); 145 } else { 146 err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); 147 f2fs_put_dnode(&dn); 148 if (f2fs_is_pinned_file(inode) && 149 !__is_valid_data_blkaddr(dn.data_blkaddr)) 150 err = -EIO; 151 } 152 153 if (err) { 154 folio_unlock(folio); 155 goto out_sem; 156 } 157 158 f2fs_folio_wait_writeback(folio, DATA, false, true); 159 160 /* wait for GCed page writeback via META_MAPPING */ 161 f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); 162 163 /* 164 * check to see if the page is mapped already (no holes) 165 */ 166 if (folio_test_mappedtodisk(folio)) 167 goto out_sem; 168 169 /* page is wholly or partially inside EOF */ 170 if (((loff_t)(folio->index + 1) << PAGE_SHIFT) > 171 i_size_read(inode)) { 172 loff_t offset; 173 174 offset = i_size_read(inode) & ~PAGE_MASK; 175 folio_zero_segment(folio, offset, folio_size(folio)); 176 } 177 folio_mark_dirty(folio); 178 179 f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE); 180 f2fs_update_time(sbi, REQ_TIME); 181 182 out_sem: 183 filemap_invalidate_unlock_shared(inode->i_mapping); 184 185 sb_end_pagefault(inode->i_sb); 186 out: 187 ret = vmf_fs_error(err); 188 189 trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret); 190 return ret; 191 } 192 193 static const struct vm_operations_struct f2fs_file_vm_ops = { 194 .fault = f2fs_filemap_fault, 195 .map_pages = filemap_map_pages, 196 .page_mkwrite = f2fs_vm_page_mkwrite, 197 }; 198 199 static int get_parent_ino(struct inode *inode, nid_t *pino) 200 { 201 struct dentry *dentry; 202 203 /* 204 * Make sure to get the non-deleted alias. The alias associated with 205 * the open file descriptor being fsync()'ed may be deleted already. 206 */ 207 dentry = d_find_alias(inode); 208 if (!dentry) 209 return 0; 210 211 *pino = d_parent_ino(dentry); 212 dput(dentry); 213 return 1; 214 } 215 216 static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) 217 { 218 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 219 enum cp_reason_type cp_reason = CP_NO_NEEDED; 220 221 if (!S_ISREG(inode->i_mode)) 222 cp_reason = CP_NON_REGULAR; 223 else if (f2fs_compressed_file(inode)) 224 cp_reason = CP_COMPRESSED; 225 else if (inode->i_nlink != 1) 226 cp_reason = CP_HARDLINK; 227 else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) 228 cp_reason = CP_SB_NEED_CP; 229 else if (file_wrong_pino(inode)) 230 cp_reason = CP_WRONG_PINO; 231 else if (!f2fs_space_for_roll_forward(sbi)) 232 cp_reason = CP_NO_SPC_ROLL; 233 else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) 234 cp_reason = CP_NODE_NEED_CP; 235 else if (test_opt(sbi, FASTBOOT)) 236 cp_reason = CP_FASTBOOT_MODE; 237 else if (F2FS_OPTION(sbi).active_logs == 2) 238 cp_reason = CP_SPEC_LOG_NUM; 239 else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT && 240 f2fs_need_dentry_mark(sbi, inode->i_ino) && 241 f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, 242 TRANS_DIR_INO)) 243 cp_reason = CP_RECOVER_DIR; 244 else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, 245 XATTR_DIR_INO)) 246 cp_reason = CP_XATTR_DIR; 247 248 return cp_reason; 249 } 250 251 static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) 252 { 253 struct folio *i = filemap_get_folio(NODE_MAPPING(sbi), ino); 254 bool ret = false; 255 /* But we need to avoid that there are some inode updates */ 256 if ((!IS_ERR(i) && folio_test_dirty(i)) || 257 f2fs_need_inode_block_update(sbi, ino)) 258 ret = true; 259 f2fs_folio_put(i, false); 260 return ret; 261 } 262 263 static void try_to_fix_pino(struct inode *inode) 264 { 265 struct f2fs_inode_info *fi = F2FS_I(inode); 266 nid_t pino; 267 268 f2fs_down_write(&fi->i_sem); 269 if (file_wrong_pino(inode) && inode->i_nlink == 1 && 270 get_parent_ino(inode, &pino)) { 271 f2fs_i_pino_write(inode, pino); 272 file_got_pino(inode); 273 } 274 f2fs_up_write(&fi->i_sem); 275 } 276 277 static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, 278 int datasync, bool atomic) 279 { 280 struct inode *inode = file->f_mapping->host; 281 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 282 nid_t ino = inode->i_ino; 283 int ret = 0; 284 enum cp_reason_type cp_reason = 0; 285 struct writeback_control wbc = { 286 .sync_mode = WB_SYNC_ALL, 287 .nr_to_write = LONG_MAX, 288 }; 289 unsigned int seq_id = 0; 290 291 if (unlikely(f2fs_readonly(inode->i_sb))) 292 return 0; 293 294 trace_f2fs_sync_file_enter(inode); 295 296 if (S_ISDIR(inode->i_mode)) 297 goto go_write; 298 299 /* if fdatasync is triggered, let's do in-place-update */ 300 if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) 301 set_inode_flag(inode, FI_NEED_IPU); 302 ret = file_write_and_wait_range(file, start, end); 303 clear_inode_flag(inode, FI_NEED_IPU); 304 305 if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { 306 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); 307 return ret; 308 } 309 310 /* if the inode is dirty, let's recover all the time */ 311 if (!f2fs_skip_inode_update(inode, datasync)) { 312 f2fs_write_inode(inode, NULL); 313 goto go_write; 314 } 315 316 /* 317 * if there is no written data, don't waste time to write recovery info. 318 */ 319 if (!is_inode_flag_set(inode, FI_APPEND_WRITE) && 320 !f2fs_exist_written_data(sbi, ino, APPEND_INO)) { 321 322 /* it may call write_inode just prior to fsync */ 323 if (need_inode_page_update(sbi, ino)) 324 goto go_write; 325 326 if (is_inode_flag_set(inode, FI_UPDATE_WRITE) || 327 f2fs_exist_written_data(sbi, ino, UPDATE_INO)) 328 goto flush_out; 329 goto out; 330 } else { 331 /* 332 * for OPU case, during fsync(), node can be persisted before 333 * data when lower device doesn't support write barrier, result 334 * in data corruption after SPO. 335 * So for strict fsync mode, force to use atomic write semantics 336 * to keep write order in between data/node and last node to 337 * avoid potential data corruption. 338 */ 339 if (F2FS_OPTION(sbi).fsync_mode == 340 FSYNC_MODE_STRICT && !atomic) 341 atomic = true; 342 } 343 go_write: 344 /* 345 * Both of fdatasync() and fsync() are able to be recovered from 346 * sudden-power-off. 347 */ 348 f2fs_down_read(&F2FS_I(inode)->i_sem); 349 cp_reason = need_do_checkpoint(inode); 350 f2fs_up_read(&F2FS_I(inode)->i_sem); 351 352 if (cp_reason) { 353 /* all the dirty node pages should be flushed for POR */ 354 ret = f2fs_sync_fs(inode->i_sb, 1); 355 356 /* 357 * We've secured consistency through sync_fs. Following pino 358 * will be used only for fsynced inodes after checkpoint. 359 */ 360 try_to_fix_pino(inode); 361 clear_inode_flag(inode, FI_APPEND_WRITE); 362 clear_inode_flag(inode, FI_UPDATE_WRITE); 363 goto out; 364 } 365 sync_nodes: 366 atomic_inc(&sbi->wb_sync_req[NODE]); 367 ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id); 368 atomic_dec(&sbi->wb_sync_req[NODE]); 369 if (ret) 370 goto out; 371 372 /* if cp_error was enabled, we should avoid infinite loop */ 373 if (unlikely(f2fs_cp_error(sbi))) { 374 ret = -EIO; 375 goto out; 376 } 377 378 if (f2fs_need_inode_block_update(sbi, ino)) { 379 f2fs_mark_inode_dirty_sync(inode, true); 380 f2fs_write_inode(inode, NULL); 381 goto sync_nodes; 382 } 383 384 /* 385 * If it's atomic_write, it's just fine to keep write ordering. So 386 * here we don't need to wait for node write completion, since we use 387 * node chain which serializes node blocks. If one of node writes are 388 * reordered, we can see simply broken chain, resulting in stopping 389 * roll-forward recovery. It means we'll recover all or none node blocks 390 * given fsync mark. 391 */ 392 if (!atomic) { 393 ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id); 394 if (ret) 395 goto out; 396 } 397 398 /* once recovery info is written, don't need to tack this */ 399 f2fs_remove_ino_entry(sbi, ino, APPEND_INO); 400 clear_inode_flag(inode, FI_APPEND_WRITE); 401 flush_out: 402 if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) 403 ret = f2fs_issue_flush(sbi, inode->i_ino); 404 if (!ret) { 405 f2fs_remove_ino_entry(sbi, ino, UPDATE_INO); 406 clear_inode_flag(inode, FI_UPDATE_WRITE); 407 f2fs_remove_ino_entry(sbi, ino, FLUSH_INO); 408 } 409 f2fs_update_time(sbi, REQ_TIME); 410 out: 411 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); 412 return ret; 413 } 414 415 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 416 { 417 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) 418 return -EIO; 419 return f2fs_do_sync_file(file, start, end, datasync, false); 420 } 421 422 static bool __found_offset(struct address_space *mapping, 423 struct dnode_of_data *dn, pgoff_t index, int whence) 424 { 425 block_t blkaddr = f2fs_data_blkaddr(dn); 426 struct inode *inode = mapping->host; 427 bool compressed_cluster = false; 428 429 if (f2fs_compressed_file(inode)) { 430 block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_folio, 431 ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size)); 432 433 compressed_cluster = first_blkaddr == COMPRESS_ADDR; 434 } 435 436 switch (whence) { 437 case SEEK_DATA: 438 if (__is_valid_data_blkaddr(blkaddr)) 439 return true; 440 if (blkaddr == NEW_ADDR && 441 xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY)) 442 return true; 443 if (compressed_cluster) 444 return true; 445 break; 446 case SEEK_HOLE: 447 if (compressed_cluster) 448 return false; 449 if (blkaddr == NULL_ADDR) 450 return true; 451 break; 452 } 453 return false; 454 } 455 456 static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) 457 { 458 struct inode *inode = file->f_mapping->host; 459 loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 460 struct dnode_of_data dn; 461 pgoff_t pgofs, end_offset; 462 loff_t data_ofs = offset; 463 loff_t isize; 464 int err = 0; 465 466 inode_lock_shared(inode); 467 468 isize = i_size_read(inode); 469 if (offset >= isize) 470 goto fail; 471 472 /* handle inline data case */ 473 if (f2fs_has_inline_data(inode)) { 474 if (whence == SEEK_HOLE) { 475 data_ofs = isize; 476 goto found; 477 } else if (whence == SEEK_DATA) { 478 data_ofs = offset; 479 goto found; 480 } 481 } 482 483 pgofs = (pgoff_t)(offset >> PAGE_SHIFT); 484 485 for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) { 486 set_new_dnode(&dn, inode, NULL, NULL, 0); 487 err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE); 488 if (err && err != -ENOENT) { 489 goto fail; 490 } else if (err == -ENOENT) { 491 /* direct node does not exists */ 492 if (whence == SEEK_DATA) { 493 pgofs = f2fs_get_next_page_offset(&dn, pgofs); 494 continue; 495 } else { 496 goto found; 497 } 498 } 499 500 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 501 502 /* find data/hole in dnode block */ 503 for (; dn.ofs_in_node < end_offset; 504 dn.ofs_in_node++, pgofs++, 505 data_ofs = (loff_t)pgofs << PAGE_SHIFT) { 506 block_t blkaddr; 507 508 blkaddr = f2fs_data_blkaddr(&dn); 509 510 if (__is_valid_data_blkaddr(blkaddr) && 511 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), 512 blkaddr, DATA_GENERIC_ENHANCE)) { 513 f2fs_put_dnode(&dn); 514 goto fail; 515 } 516 517 if (__found_offset(file->f_mapping, &dn, 518 pgofs, whence)) { 519 f2fs_put_dnode(&dn); 520 goto found; 521 } 522 } 523 f2fs_put_dnode(&dn); 524 } 525 526 if (whence == SEEK_DATA) 527 goto fail; 528 found: 529 if (whence == SEEK_HOLE && data_ofs > isize) 530 data_ofs = isize; 531 inode_unlock_shared(inode); 532 return vfs_setpos(file, data_ofs, maxbytes); 533 fail: 534 inode_unlock_shared(inode); 535 return -ENXIO; 536 } 537 538 static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) 539 { 540 struct inode *inode = file->f_mapping->host; 541 loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 542 543 switch (whence) { 544 case SEEK_SET: 545 case SEEK_CUR: 546 case SEEK_END: 547 return generic_file_llseek_size(file, offset, whence, 548 maxbytes, i_size_read(inode)); 549 case SEEK_DATA: 550 case SEEK_HOLE: 551 if (offset < 0) 552 return -ENXIO; 553 return f2fs_seek_block(file, offset, whence); 554 } 555 556 return -EINVAL; 557 } 558 559 static int f2fs_file_mmap_prepare(struct vm_area_desc *desc) 560 { 561 struct file *file = desc->file; 562 struct inode *inode = file_inode(file); 563 564 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 565 return -EIO; 566 567 if (!f2fs_is_compress_backend_ready(inode)) 568 return -EOPNOTSUPP; 569 570 file_accessed(file); 571 desc->vm_ops = &f2fs_file_vm_ops; 572 573 f2fs_down_read(&F2FS_I(inode)->i_sem); 574 set_inode_flag(inode, FI_MMAP_FILE); 575 f2fs_up_read(&F2FS_I(inode)->i_sem); 576 577 return 0; 578 } 579 580 static int finish_preallocate_blocks(struct inode *inode) 581 { 582 int ret = 0; 583 bool opened; 584 585 f2fs_down_read(&F2FS_I(inode)->i_sem); 586 opened = is_inode_flag_set(inode, FI_OPENED_FILE); 587 f2fs_up_read(&F2FS_I(inode)->i_sem); 588 if (opened) 589 return 0; 590 591 inode_lock(inode); 592 if (is_inode_flag_set(inode, FI_OPENED_FILE)) 593 goto out_unlock; 594 595 if (!file_should_truncate(inode)) 596 goto out_update; 597 598 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 599 filemap_invalidate_lock(inode->i_mapping); 600 601 truncate_setsize(inode, i_size_read(inode)); 602 ret = f2fs_truncate(inode); 603 604 filemap_invalidate_unlock(inode->i_mapping); 605 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 606 if (ret) 607 goto out_unlock; 608 609 file_dont_truncate(inode); 610 out_update: 611 f2fs_down_write(&F2FS_I(inode)->i_sem); 612 set_inode_flag(inode, FI_OPENED_FILE); 613 f2fs_up_write(&F2FS_I(inode)->i_sem); 614 out_unlock: 615 inode_unlock(inode); 616 return ret; 617 } 618 619 static int f2fs_file_open(struct inode *inode, struct file *filp) 620 { 621 int err = fscrypt_file_open(inode, filp); 622 623 if (err) 624 return err; 625 626 if (!f2fs_is_compress_backend_ready(inode)) 627 return -EOPNOTSUPP; 628 629 if (mapping_large_folio_support(inode->i_mapping) && 630 filp->f_mode & FMODE_WRITE) 631 return -EOPNOTSUPP; 632 633 err = fsverity_file_open(inode, filp); 634 if (err) 635 return err; 636 637 filp->f_mode |= FMODE_NOWAIT; 638 filp->f_mode |= FMODE_CAN_ODIRECT; 639 640 err = dquot_file_open(inode, filp); 641 if (err) 642 return err; 643 644 err = finish_preallocate_blocks(inode); 645 if (!err) 646 atomic_inc(&F2FS_I(inode)->open_count); 647 return err; 648 } 649 650 void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) 651 { 652 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 653 int nr_free = 0, ofs = dn->ofs_in_node, len = count; 654 __le32 *addr; 655 bool compressed_cluster = false; 656 int cluster_index = 0, valid_blocks = 0; 657 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 658 bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks); 659 block_t blkstart; 660 int blklen = 0; 661 662 addr = get_dnode_addr(dn->inode, dn->node_folio) + ofs; 663 blkstart = le32_to_cpu(*addr); 664 665 /* Assumption: truncation starts with cluster */ 666 for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { 667 block_t blkaddr = le32_to_cpu(*addr); 668 669 if (f2fs_compressed_file(dn->inode) && 670 !(cluster_index & (cluster_size - 1))) { 671 if (compressed_cluster) 672 f2fs_i_compr_blocks_update(dn->inode, 673 valid_blocks, false); 674 compressed_cluster = (blkaddr == COMPRESS_ADDR); 675 valid_blocks = 0; 676 } 677 678 if (blkaddr == NULL_ADDR) 679 goto next; 680 681 f2fs_set_data_blkaddr(dn, NULL_ADDR); 682 683 if (__is_valid_data_blkaddr(blkaddr)) { 684 if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE)) 685 goto next; 686 if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr, 687 DATA_GENERIC_ENHANCE)) 688 goto next; 689 if (compressed_cluster) 690 valid_blocks++; 691 } 692 693 if (blkstart + blklen == blkaddr) { 694 blklen++; 695 } else { 696 f2fs_invalidate_blocks(sbi, blkstart, blklen); 697 blkstart = blkaddr; 698 blklen = 1; 699 } 700 701 if (!released || blkaddr != COMPRESS_ADDR) 702 nr_free++; 703 704 continue; 705 706 next: 707 if (blklen) 708 f2fs_invalidate_blocks(sbi, blkstart, blklen); 709 710 blkstart = le32_to_cpu(*(addr + 1)); 711 blklen = 0; 712 } 713 714 if (blklen) 715 f2fs_invalidate_blocks(sbi, blkstart, blklen); 716 717 if (compressed_cluster) 718 f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false); 719 720 if (nr_free) { 721 pgoff_t fofs; 722 /* 723 * once we invalidate valid blkaddr in range [ofs, ofs + count], 724 * we will invalidate all blkaddr in the whole range. 725 */ 726 fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), 727 dn->inode) + ofs; 728 f2fs_update_read_extent_cache_range(dn, fofs, 0, len); 729 f2fs_update_age_extent_cache_range(dn, fofs, len); 730 dec_valid_block_count(sbi, dn->inode, nr_free); 731 } 732 dn->ofs_in_node = ofs; 733 734 f2fs_update_time(sbi, REQ_TIME); 735 trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, 736 dn->ofs_in_node, nr_free); 737 } 738 739 static int truncate_partial_data_page(struct inode *inode, u64 from, 740 bool cache_only) 741 { 742 loff_t offset = from & (PAGE_SIZE - 1); 743 pgoff_t index = from >> PAGE_SHIFT; 744 struct address_space *mapping = inode->i_mapping; 745 struct folio *folio; 746 747 if (!offset && !cache_only) 748 return 0; 749 750 if (cache_only) { 751 folio = filemap_lock_folio(mapping, index); 752 if (IS_ERR(folio)) 753 return 0; 754 if (folio_test_uptodate(folio)) 755 goto truncate_out; 756 f2fs_folio_put(folio, true); 757 return 0; 758 } 759 760 folio = f2fs_get_lock_data_folio(inode, index, true); 761 if (IS_ERR(folio)) 762 return PTR_ERR(folio) == -ENOENT ? 0 : PTR_ERR(folio); 763 truncate_out: 764 f2fs_folio_wait_writeback(folio, DATA, true, true); 765 folio_zero_segment(folio, offset, folio_size(folio)); 766 767 /* An encrypted inode should have a key and truncate the last page. */ 768 f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode)); 769 if (!cache_only) 770 folio_mark_dirty(folio); 771 f2fs_folio_put(folio, true); 772 return 0; 773 } 774 775 int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) 776 { 777 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 778 struct dnode_of_data dn; 779 struct f2fs_lock_context lc; 780 pgoff_t free_from; 781 int count = 0, err = 0; 782 struct folio *ifolio; 783 bool truncate_page = false; 784 785 trace_f2fs_truncate_blocks_enter(inode, from); 786 787 if (IS_DEVICE_ALIASING(inode) && from) { 788 err = -EINVAL; 789 goto out_err; 790 } 791 792 free_from = (pgoff_t)F2FS_BLK_ALIGN(from); 793 794 if (free_from >= max_file_blocks(inode)) 795 goto free_partial; 796 797 if (lock) 798 f2fs_lock_op(sbi, &lc); 799 800 ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); 801 if (IS_ERR(ifolio)) { 802 err = PTR_ERR(ifolio); 803 goto out; 804 } 805 806 if (IS_DEVICE_ALIASING(inode)) { 807 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; 808 struct extent_info ei = et->largest; 809 810 f2fs_invalidate_blocks(sbi, ei.blk, ei.len); 811 812 dec_valid_block_count(sbi, inode, ei.len); 813 f2fs_update_time(sbi, REQ_TIME); 814 815 f2fs_folio_put(ifolio, true); 816 goto out; 817 } 818 819 if (f2fs_has_inline_data(inode)) { 820 f2fs_truncate_inline_inode(inode, ifolio, from); 821 f2fs_folio_put(ifolio, true); 822 truncate_page = true; 823 goto out; 824 } 825 826 set_new_dnode(&dn, inode, ifolio, NULL, 0); 827 err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); 828 if (err) { 829 if (err == -ENOENT) 830 goto free_next; 831 goto out; 832 } 833 834 count = ADDRS_PER_PAGE(dn.node_folio, inode); 835 836 count -= dn.ofs_in_node; 837 f2fs_bug_on(sbi, count < 0); 838 839 if (dn.ofs_in_node || IS_INODE(dn.node_folio)) { 840 f2fs_truncate_data_blocks_range(&dn, count); 841 free_from += count; 842 } 843 844 f2fs_put_dnode(&dn); 845 free_next: 846 err = f2fs_truncate_inode_blocks(inode, free_from); 847 out: 848 if (lock) 849 f2fs_unlock_op(sbi, &lc); 850 free_partial: 851 /* lastly zero out the first data page */ 852 if (!err) 853 err = truncate_partial_data_page(inode, from, truncate_page); 854 out_err: 855 trace_f2fs_truncate_blocks_exit(inode, err); 856 return err; 857 } 858 859 int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) 860 { 861 u64 free_from = from; 862 int err; 863 864 #ifdef CONFIG_F2FS_FS_COMPRESSION 865 /* 866 * for compressed file, only support cluster size 867 * aligned truncation. 868 */ 869 if (f2fs_compressed_file(inode)) 870 free_from = round_up(from, 871 F2FS_I(inode)->i_cluster_size << PAGE_SHIFT); 872 #endif 873 874 err = f2fs_do_truncate_blocks(inode, free_from, lock); 875 if (err) 876 return err; 877 878 #ifdef CONFIG_F2FS_FS_COMPRESSION 879 /* 880 * For compressed file, after release compress blocks, don't allow write 881 * direct, but we should allow write direct after truncate to zero. 882 */ 883 if (f2fs_compressed_file(inode) && !free_from 884 && is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) 885 clear_inode_flag(inode, FI_COMPRESS_RELEASED); 886 887 if (from != free_from) { 888 err = f2fs_truncate_partial_cluster(inode, from, lock); 889 if (err) 890 return err; 891 } 892 #endif 893 894 return 0; 895 } 896 897 int f2fs_truncate(struct inode *inode) 898 { 899 int err; 900 901 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 902 return -EIO; 903 904 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 905 S_ISLNK(inode->i_mode))) 906 return 0; 907 908 trace_f2fs_truncate(inode); 909 910 if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) 911 return -EIO; 912 913 err = f2fs_dquot_initialize(inode); 914 if (err) 915 return err; 916 917 /* we should check inline_data size */ 918 if (!f2fs_may_inline_data(inode)) { 919 err = f2fs_convert_inline_inode(inode); 920 if (err) { 921 /* 922 * Always truncate page #0 to avoid page cache 923 * leak in evict() path. 924 */ 925 truncate_inode_pages_range(inode->i_mapping, 926 F2FS_BLK_TO_BYTES(0), 927 F2FS_BLK_END_BYTES(0)); 928 return err; 929 } 930 } 931 932 err = f2fs_truncate_blocks(inode, i_size_read(inode), true); 933 if (err) 934 return err; 935 936 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 937 f2fs_mark_inode_dirty_sync(inode, false); 938 return 0; 939 } 940 941 static bool f2fs_force_buffered_io(struct inode *inode, int rw) 942 { 943 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 944 945 if (!fscrypt_dio_supported(inode)) 946 return true; 947 if (fsverity_active(inode)) 948 return true; 949 if (f2fs_compressed_file(inode)) 950 return true; 951 /* 952 * only force direct read to use buffered IO, for direct write, 953 * it expects inline data conversion before committing IO. 954 */ 955 if (f2fs_has_inline_data(inode) && rw == READ) 956 return true; 957 958 /* disallow direct IO if any of devices has unaligned blksize */ 959 if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) 960 return true; 961 /* 962 * for blkzoned device, fallback direct IO to buffered IO, so 963 * all IOs can be serialized by log-structured write. 964 */ 965 if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) && 966 !f2fs_is_pinned_file(inode)) 967 return true; 968 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) 969 return true; 970 971 return false; 972 } 973 974 int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path, 975 struct kstat *stat, u32 request_mask, unsigned int query_flags) 976 { 977 struct inode *inode = d_inode(path->dentry); 978 struct f2fs_inode_info *fi = F2FS_I(inode); 979 struct f2fs_inode *ri = NULL; 980 unsigned int flags; 981 982 if (f2fs_has_extra_attr(inode) && 983 f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && 984 F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { 985 stat->result_mask |= STATX_BTIME; 986 stat->btime.tv_sec = fi->i_crtime.tv_sec; 987 stat->btime.tv_nsec = fi->i_crtime.tv_nsec; 988 } 989 990 /* 991 * Return the DIO alignment restrictions if requested. We only return 992 * this information when requested, since on encrypted files it might 993 * take a fair bit of work to get if the file wasn't opened recently. 994 * 995 * f2fs sometimes supports DIO reads but not DIO writes. STATX_DIOALIGN 996 * cannot represent that, so in that case we report no DIO support. 997 */ 998 if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) { 999 unsigned int bsize = i_blocksize(inode); 1000 1001 stat->result_mask |= STATX_DIOALIGN; 1002 if (!f2fs_force_buffered_io(inode, WRITE)) { 1003 stat->dio_mem_align = bsize; 1004 stat->dio_offset_align = bsize; 1005 } 1006 } 1007 1008 flags = fi->i_flags; 1009 if (flags & F2FS_COMPR_FL) 1010 stat->attributes |= STATX_ATTR_COMPRESSED; 1011 if (flags & F2FS_APPEND_FL) 1012 stat->attributes |= STATX_ATTR_APPEND; 1013 if (IS_ENCRYPTED(inode)) 1014 stat->attributes |= STATX_ATTR_ENCRYPTED; 1015 if (flags & F2FS_IMMUTABLE_FL) 1016 stat->attributes |= STATX_ATTR_IMMUTABLE; 1017 if (flags & F2FS_NODUMP_FL) 1018 stat->attributes |= STATX_ATTR_NODUMP; 1019 if (IS_VERITY(inode)) 1020 stat->attributes |= STATX_ATTR_VERITY; 1021 1022 stat->attributes_mask |= (STATX_ATTR_COMPRESSED | 1023 STATX_ATTR_APPEND | 1024 STATX_ATTR_ENCRYPTED | 1025 STATX_ATTR_IMMUTABLE | 1026 STATX_ATTR_NODUMP | 1027 STATX_ATTR_VERITY); 1028 1029 generic_fillattr(idmap, request_mask, inode, stat); 1030 1031 /* we need to show initial sectors used for inline_data/dentries */ 1032 if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) || 1033 f2fs_has_inline_dentry(inode)) 1034 stat->blocks += (stat->size + 511) >> 9; 1035 1036 return 0; 1037 } 1038 1039 #ifdef CONFIG_F2FS_FS_POSIX_ACL 1040 static void __setattr_copy(struct mnt_idmap *idmap, 1041 struct inode *inode, const struct iattr *attr) 1042 { 1043 unsigned int ia_valid = attr->ia_valid; 1044 1045 i_uid_update(idmap, attr, inode); 1046 i_gid_update(idmap, attr, inode); 1047 if (ia_valid & ATTR_ATIME) 1048 inode_set_atime_to_ts(inode, attr->ia_atime); 1049 if (ia_valid & ATTR_MTIME) 1050 inode_set_mtime_to_ts(inode, attr->ia_mtime); 1051 if (ia_valid & ATTR_CTIME) 1052 inode_set_ctime_to_ts(inode, attr->ia_ctime); 1053 if (ia_valid & ATTR_MODE) { 1054 umode_t mode = attr->ia_mode; 1055 1056 if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) 1057 mode &= ~S_ISGID; 1058 set_acl_inode(inode, mode); 1059 } 1060 } 1061 #else 1062 #define __setattr_copy setattr_copy 1063 #endif 1064 1065 int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 1066 struct iattr *attr) 1067 { 1068 struct inode *inode = d_inode(dentry); 1069 struct f2fs_inode_info *fi = F2FS_I(inode); 1070 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1071 int err; 1072 1073 if (unlikely(f2fs_cp_error(sbi))) 1074 return -EIO; 1075 1076 err = setattr_prepare(idmap, dentry, attr); 1077 if (err) 1078 return err; 1079 1080 err = fscrypt_prepare_setattr(dentry, attr); 1081 if (err) 1082 return err; 1083 1084 if (unlikely(IS_IMMUTABLE(inode))) 1085 return -EPERM; 1086 1087 if (unlikely(IS_APPEND(inode) && 1088 (attr->ia_valid & (ATTR_MODE | ATTR_UID | 1089 ATTR_GID | ATTR_TIMES_SET)))) 1090 return -EPERM; 1091 1092 if ((attr->ia_valid & ATTR_SIZE)) { 1093 if (!f2fs_is_compress_backend_ready(inode) || 1094 IS_DEVICE_ALIASING(inode)) 1095 return -EOPNOTSUPP; 1096 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && 1097 !IS_ALIGNED(attr->ia_size, 1098 F2FS_BLK_TO_BYTES(fi->i_cluster_size))) 1099 return -EINVAL; 1100 /* 1101 * To prevent scattered pin block generation, we don't allow 1102 * smaller/equal size unaligned truncation for pinned file. 1103 * We only support overwrite IO to pinned file, so don't 1104 * care about larger size truncation. 1105 */ 1106 if (f2fs_is_pinned_file(inode) && 1107 attr->ia_size <= i_size_read(inode) && 1108 !IS_ALIGNED(attr->ia_size, 1109 F2FS_BLK_TO_BYTES(CAP_BLKS_PER_SEC(sbi)))) 1110 return -EINVAL; 1111 } 1112 1113 if (is_quota_modification(idmap, inode, attr)) { 1114 err = f2fs_dquot_initialize(inode); 1115 if (err) 1116 return err; 1117 } 1118 if (i_uid_needs_update(idmap, attr, inode) || 1119 i_gid_needs_update(idmap, attr, inode)) { 1120 struct f2fs_lock_context lc; 1121 1122 f2fs_lock_op(sbi, &lc); 1123 err = dquot_transfer(idmap, inode, attr); 1124 if (err) { 1125 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); 1126 f2fs_unlock_op(sbi, &lc); 1127 return err; 1128 } 1129 /* 1130 * update uid/gid under lock_op(), so that dquot and inode can 1131 * be updated atomically. 1132 */ 1133 i_uid_update(idmap, attr, inode); 1134 i_gid_update(idmap, attr, inode); 1135 f2fs_mark_inode_dirty_sync(inode, true); 1136 f2fs_unlock_op(sbi, &lc); 1137 } 1138 1139 if (attr->ia_valid & ATTR_SIZE) { 1140 loff_t old_size = i_size_read(inode); 1141 1142 if (attr->ia_size > MAX_INLINE_DATA(inode)) { 1143 /* 1144 * should convert inline inode before i_size_write to 1145 * keep smaller than inline_data size with inline flag. 1146 */ 1147 err = f2fs_convert_inline_inode(inode); 1148 if (err) 1149 return err; 1150 } 1151 1152 /* 1153 * wait for inflight dio, blocks should be removed after 1154 * IO completion. 1155 */ 1156 if (attr->ia_size < old_size) 1157 inode_dio_wait(inode); 1158 1159 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 1160 filemap_invalidate_lock(inode->i_mapping); 1161 1162 if (attr->ia_size > old_size) 1163 f2fs_zero_post_eof_page(inode, attr->ia_size, false); 1164 truncate_setsize(inode, attr->ia_size); 1165 1166 if (attr->ia_size <= old_size) 1167 err = f2fs_truncate(inode); 1168 /* 1169 * do not trim all blocks after i_size if target size is 1170 * larger than i_size. 1171 */ 1172 filemap_invalidate_unlock(inode->i_mapping); 1173 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 1174 if (err) 1175 return err; 1176 1177 spin_lock(&fi->i_size_lock); 1178 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 1179 fi->last_disk_size = i_size_read(inode); 1180 spin_unlock(&fi->i_size_lock); 1181 } 1182 1183 __setattr_copy(idmap, inode, attr); 1184 1185 if (attr->ia_valid & ATTR_MODE) { 1186 err = posix_acl_chmod(idmap, dentry, f2fs_get_inode_mode(inode)); 1187 1188 if (is_inode_flag_set(inode, FI_ACL_MODE)) { 1189 if (!err) 1190 inode->i_mode = fi->i_acl_mode; 1191 clear_inode_flag(inode, FI_ACL_MODE); 1192 } 1193 } 1194 1195 /* file size may changed here */ 1196 f2fs_mark_inode_dirty_sync(inode, true); 1197 1198 /* inode change will produce dirty node pages flushed by checkpoint */ 1199 f2fs_balance_fs(sbi, true); 1200 1201 return err; 1202 } 1203 1204 const struct inode_operations f2fs_file_inode_operations = { 1205 .getattr = f2fs_getattr, 1206 .setattr = f2fs_setattr, 1207 .get_inode_acl = f2fs_get_acl, 1208 .set_acl = f2fs_set_acl, 1209 .listxattr = f2fs_listxattr, 1210 .fiemap = f2fs_fiemap, 1211 .fileattr_get = f2fs_fileattr_get, 1212 .fileattr_set = f2fs_fileattr_set, 1213 }; 1214 1215 static int fill_zero(struct inode *inode, pgoff_t index, 1216 loff_t start, loff_t len) 1217 { 1218 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1219 struct folio *folio; 1220 struct f2fs_lock_context lc; 1221 1222 if (!len) 1223 return 0; 1224 1225 f2fs_balance_fs(sbi, true); 1226 1227 f2fs_lock_op(sbi, &lc); 1228 folio = f2fs_get_new_data_folio(inode, NULL, index, false); 1229 f2fs_unlock_op(sbi, &lc); 1230 1231 if (IS_ERR(folio)) 1232 return PTR_ERR(folio); 1233 1234 f2fs_folio_wait_writeback(folio, DATA, true, true); 1235 folio_zero_range(folio, start, len); 1236 folio_mark_dirty(folio); 1237 f2fs_folio_put(folio, true); 1238 return 0; 1239 } 1240 1241 int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) 1242 { 1243 int err; 1244 1245 while (pg_start < pg_end) { 1246 struct dnode_of_data dn; 1247 pgoff_t end_offset, count; 1248 1249 set_new_dnode(&dn, inode, NULL, NULL, 0); 1250 err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); 1251 if (err) { 1252 if (err == -ENOENT) { 1253 pg_start = f2fs_get_next_page_offset(&dn, 1254 pg_start); 1255 continue; 1256 } 1257 return err; 1258 } 1259 1260 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 1261 count = min(end_offset - dn.ofs_in_node, pg_end - pg_start); 1262 1263 f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset); 1264 1265 f2fs_truncate_data_blocks_range(&dn, count); 1266 f2fs_put_dnode(&dn); 1267 1268 pg_start += count; 1269 } 1270 return 0; 1271 } 1272 1273 static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len) 1274 { 1275 pgoff_t pg_start, pg_end; 1276 loff_t off_start, off_end; 1277 int ret; 1278 1279 ret = f2fs_convert_inline_inode(inode); 1280 if (ret) 1281 return ret; 1282 1283 f2fs_zero_post_eof_page(inode, offset + len, true); 1284 1285 pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; 1286 pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; 1287 1288 off_start = offset & (PAGE_SIZE - 1); 1289 off_end = (offset + len) & (PAGE_SIZE - 1); 1290 1291 if (pg_start == pg_end) { 1292 ret = fill_zero(inode, pg_start, off_start, 1293 off_end - off_start); 1294 if (ret) 1295 return ret; 1296 } else { 1297 if (off_start) { 1298 ret = fill_zero(inode, pg_start++, off_start, 1299 PAGE_SIZE - off_start); 1300 if (ret) 1301 return ret; 1302 } 1303 if (off_end) { 1304 ret = fill_zero(inode, pg_end, 0, off_end); 1305 if (ret) 1306 return ret; 1307 } 1308 1309 if (pg_start < pg_end) { 1310 loff_t blk_start, blk_end; 1311 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1312 struct f2fs_lock_context lc; 1313 1314 f2fs_balance_fs(sbi, true); 1315 1316 blk_start = (loff_t)pg_start << PAGE_SHIFT; 1317 blk_end = (loff_t)pg_end << PAGE_SHIFT; 1318 1319 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1320 filemap_invalidate_lock(inode->i_mapping); 1321 1322 truncate_pagecache_range(inode, blk_start, blk_end - 1); 1323 1324 f2fs_lock_op(sbi, &lc); 1325 ret = f2fs_truncate_hole(inode, pg_start, pg_end); 1326 f2fs_unlock_op(sbi, &lc); 1327 1328 filemap_invalidate_unlock(inode->i_mapping); 1329 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1330 } 1331 } 1332 1333 return ret; 1334 } 1335 1336 static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, 1337 int *do_replace, pgoff_t off, pgoff_t len) 1338 { 1339 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1340 struct dnode_of_data dn; 1341 int ret, done, i; 1342 1343 next_dnode: 1344 set_new_dnode(&dn, inode, NULL, NULL, 0); 1345 ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); 1346 if (ret && ret != -ENOENT) { 1347 return ret; 1348 } else if (ret == -ENOENT) { 1349 if (dn.max_level == 0) 1350 return -ENOENT; 1351 done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - 1352 dn.ofs_in_node, len); 1353 blkaddr += done; 1354 do_replace += done; 1355 goto next; 1356 } 1357 1358 done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, inode) - 1359 dn.ofs_in_node, len); 1360 for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { 1361 *blkaddr = f2fs_data_blkaddr(&dn); 1362 1363 if (__is_valid_data_blkaddr(*blkaddr) && 1364 !f2fs_is_valid_blkaddr(sbi, *blkaddr, 1365 DATA_GENERIC_ENHANCE)) { 1366 f2fs_put_dnode(&dn); 1367 return -EFSCORRUPTED; 1368 } 1369 1370 if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { 1371 1372 if (f2fs_lfs_mode(sbi)) { 1373 f2fs_put_dnode(&dn); 1374 return -EOPNOTSUPP; 1375 } 1376 1377 /* do not invalidate this block address */ 1378 f2fs_update_data_blkaddr(&dn, NULL_ADDR); 1379 *do_replace = 1; 1380 } 1381 } 1382 f2fs_put_dnode(&dn); 1383 next: 1384 len -= done; 1385 off += done; 1386 if (len) 1387 goto next_dnode; 1388 return 0; 1389 } 1390 1391 static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, 1392 int *do_replace, pgoff_t off, int len) 1393 { 1394 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1395 struct dnode_of_data dn; 1396 int ret, i; 1397 1398 for (i = 0; i < len; i++, do_replace++, blkaddr++) { 1399 if (*do_replace == 0) 1400 continue; 1401 1402 set_new_dnode(&dn, inode, NULL, NULL, 0); 1403 ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); 1404 if (ret) { 1405 dec_valid_block_count(sbi, inode, 1); 1406 f2fs_invalidate_blocks(sbi, *blkaddr, 1); 1407 } else { 1408 f2fs_update_data_blkaddr(&dn, *blkaddr); 1409 } 1410 f2fs_put_dnode(&dn); 1411 } 1412 return 0; 1413 } 1414 1415 static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, 1416 block_t *blkaddr, int *do_replace, 1417 pgoff_t src, pgoff_t dst, pgoff_t len, bool full) 1418 { 1419 struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode); 1420 pgoff_t i = 0; 1421 int ret; 1422 1423 while (i < len) { 1424 if (blkaddr[i] == NULL_ADDR && !full) { 1425 i++; 1426 continue; 1427 } 1428 1429 if (do_replace[i] || blkaddr[i] == NULL_ADDR) { 1430 struct dnode_of_data dn; 1431 struct node_info ni; 1432 size_t new_size; 1433 pgoff_t ilen; 1434 1435 set_new_dnode(&dn, dst_inode, NULL, NULL, 0); 1436 ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE); 1437 if (ret) 1438 return ret; 1439 1440 ret = f2fs_get_node_info(sbi, dn.nid, &ni, false); 1441 if (ret) { 1442 f2fs_put_dnode(&dn); 1443 return ret; 1444 } 1445 1446 ilen = min((pgoff_t) 1447 ADDRS_PER_PAGE(dn.node_folio, dst_inode) - 1448 dn.ofs_in_node, len - i); 1449 do { 1450 dn.data_blkaddr = f2fs_data_blkaddr(&dn); 1451 f2fs_truncate_data_blocks_range(&dn, 1); 1452 1453 if (do_replace[i]) { 1454 f2fs_i_blocks_write(src_inode, 1455 1, false, false); 1456 f2fs_i_blocks_write(dst_inode, 1457 1, true, false); 1458 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, 1459 blkaddr[i], ni.version, true, false); 1460 1461 do_replace[i] = 0; 1462 } 1463 dn.ofs_in_node++; 1464 i++; 1465 new_size = (loff_t)(dst + i) << PAGE_SHIFT; 1466 if (dst_inode->i_size < new_size) 1467 f2fs_i_size_write(dst_inode, new_size); 1468 } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR)); 1469 1470 f2fs_put_dnode(&dn); 1471 } else { 1472 struct folio *fsrc, *fdst; 1473 1474 fsrc = f2fs_get_lock_data_folio(src_inode, 1475 src + i, true); 1476 if (IS_ERR(fsrc)) 1477 return PTR_ERR(fsrc); 1478 fdst = f2fs_get_new_data_folio(dst_inode, NULL, dst + i, 1479 true); 1480 if (IS_ERR(fdst)) { 1481 f2fs_folio_put(fsrc, true); 1482 return PTR_ERR(fdst); 1483 } 1484 1485 f2fs_folio_wait_writeback(fdst, DATA, true, true); 1486 1487 memcpy_folio(fdst, 0, fsrc, 0, PAGE_SIZE); 1488 folio_mark_dirty(fdst); 1489 folio_set_f2fs_gcing(fdst); 1490 f2fs_folio_put(fdst, true); 1491 f2fs_folio_put(fsrc, true); 1492 1493 ret = f2fs_truncate_hole(src_inode, 1494 src + i, src + i + 1); 1495 if (ret) 1496 return ret; 1497 i++; 1498 } 1499 } 1500 return 0; 1501 } 1502 1503 static int __exchange_data_block(struct inode *src_inode, 1504 struct inode *dst_inode, pgoff_t src, pgoff_t dst, 1505 pgoff_t len, bool full) 1506 { 1507 block_t *src_blkaddr; 1508 int *do_replace; 1509 pgoff_t olen; 1510 int ret; 1511 1512 while (len) { 1513 olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len); 1514 1515 src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode), 1516 array_size(olen, sizeof(block_t)), 1517 GFP_NOFS); 1518 if (!src_blkaddr) 1519 return -ENOMEM; 1520 1521 do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode), 1522 array_size(olen, sizeof(int)), 1523 GFP_NOFS); 1524 if (!do_replace) { 1525 kvfree(src_blkaddr); 1526 return -ENOMEM; 1527 } 1528 1529 ret = __read_out_blkaddrs(src_inode, src_blkaddr, 1530 do_replace, src, olen); 1531 if (ret) 1532 goto roll_back; 1533 1534 ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr, 1535 do_replace, src, dst, olen, full); 1536 if (ret) 1537 goto roll_back; 1538 1539 src += olen; 1540 dst += olen; 1541 len -= olen; 1542 1543 kvfree(src_blkaddr); 1544 kvfree(do_replace); 1545 } 1546 return 0; 1547 1548 roll_back: 1549 __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen); 1550 kvfree(src_blkaddr); 1551 kvfree(do_replace); 1552 return ret; 1553 } 1554 1555 static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) 1556 { 1557 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1558 struct f2fs_lock_context lc; 1559 pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 1560 pgoff_t start = offset >> PAGE_SHIFT; 1561 pgoff_t end = (offset + len) >> PAGE_SHIFT; 1562 int ret; 1563 1564 f2fs_balance_fs(sbi, true); 1565 1566 /* avoid gc operation during block exchange */ 1567 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1568 filemap_invalidate_lock(inode->i_mapping); 1569 1570 f2fs_zero_post_eof_page(inode, offset + len, false); 1571 1572 f2fs_lock_op(sbi, &lc); 1573 f2fs_drop_extent_tree(inode); 1574 truncate_pagecache(inode, offset); 1575 ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true); 1576 f2fs_unlock_op(sbi, &lc); 1577 1578 filemap_invalidate_unlock(inode->i_mapping); 1579 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1580 return ret; 1581 } 1582 1583 static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) 1584 { 1585 loff_t new_size; 1586 int ret; 1587 1588 if (offset + len >= i_size_read(inode)) 1589 return -EINVAL; 1590 1591 /* collapse range should be aligned to block size of f2fs. */ 1592 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) 1593 return -EINVAL; 1594 1595 ret = f2fs_convert_inline_inode(inode); 1596 if (ret) 1597 return ret; 1598 1599 /* write out all dirty pages from offset */ 1600 ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); 1601 if (ret) 1602 return ret; 1603 1604 ret = f2fs_do_collapse(inode, offset, len); 1605 if (ret) 1606 return ret; 1607 1608 /* write out all moved pages, if possible */ 1609 filemap_invalidate_lock(inode->i_mapping); 1610 filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); 1611 truncate_pagecache(inode, offset); 1612 1613 new_size = i_size_read(inode) - len; 1614 ret = f2fs_truncate_blocks(inode, new_size, true); 1615 filemap_invalidate_unlock(inode->i_mapping); 1616 if (!ret) 1617 f2fs_i_size_write(inode, new_size); 1618 return ret; 1619 } 1620 1621 static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, 1622 pgoff_t end) 1623 { 1624 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1625 pgoff_t index = start; 1626 unsigned int ofs_in_node = dn->ofs_in_node; 1627 blkcnt_t count = 0; 1628 int ret; 1629 1630 for (; index < end; index++, dn->ofs_in_node++) { 1631 if (f2fs_data_blkaddr(dn) == NULL_ADDR) 1632 count++; 1633 } 1634 1635 dn->ofs_in_node = ofs_in_node; 1636 ret = f2fs_reserve_new_blocks(dn, count); 1637 if (ret) 1638 return ret; 1639 1640 dn->ofs_in_node = ofs_in_node; 1641 for (index = start; index < end; index++, dn->ofs_in_node++) { 1642 dn->data_blkaddr = f2fs_data_blkaddr(dn); 1643 /* 1644 * f2fs_reserve_new_blocks will not guarantee entire block 1645 * allocation. 1646 */ 1647 if (dn->data_blkaddr == NULL_ADDR) { 1648 ret = -ENOSPC; 1649 break; 1650 } 1651 1652 if (dn->data_blkaddr == NEW_ADDR) 1653 continue; 1654 1655 if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr, 1656 DATA_GENERIC_ENHANCE)) { 1657 ret = -EFSCORRUPTED; 1658 break; 1659 } 1660 1661 f2fs_invalidate_blocks(sbi, dn->data_blkaddr, 1); 1662 f2fs_set_data_blkaddr(dn, NEW_ADDR); 1663 } 1664 1665 if (index > start) { 1666 f2fs_update_read_extent_cache_range(dn, start, 0, 1667 index - start); 1668 f2fs_update_age_extent_cache_range(dn, start, index - start); 1669 } 1670 1671 return ret; 1672 } 1673 1674 static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, 1675 int mode) 1676 { 1677 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1678 struct address_space *mapping = inode->i_mapping; 1679 pgoff_t index, pg_start, pg_end; 1680 loff_t new_size = i_size_read(inode); 1681 loff_t off_start, off_end; 1682 int ret = 0; 1683 1684 ret = inode_newsize_ok(inode, (len + offset)); 1685 if (ret) 1686 return ret; 1687 1688 ret = f2fs_convert_inline_inode(inode); 1689 if (ret) 1690 return ret; 1691 1692 ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); 1693 if (ret) 1694 return ret; 1695 1696 f2fs_zero_post_eof_page(inode, offset + len, true); 1697 1698 pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; 1699 pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; 1700 1701 off_start = offset & (PAGE_SIZE - 1); 1702 off_end = (offset + len) & (PAGE_SIZE - 1); 1703 1704 if (pg_start == pg_end) { 1705 ret = fill_zero(inode, pg_start, off_start, 1706 off_end - off_start); 1707 if (ret) 1708 return ret; 1709 1710 new_size = max_t(loff_t, new_size, offset + len); 1711 } else { 1712 if (off_start) { 1713 ret = fill_zero(inode, pg_start++, off_start, 1714 PAGE_SIZE - off_start); 1715 if (ret) 1716 return ret; 1717 1718 new_size = max_t(loff_t, new_size, 1719 (loff_t)pg_start << PAGE_SHIFT); 1720 } 1721 1722 for (index = pg_start; index < pg_end;) { 1723 struct dnode_of_data dn; 1724 struct f2fs_lock_context lc; 1725 unsigned int end_offset; 1726 pgoff_t end; 1727 1728 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1729 filemap_invalidate_lock(mapping); 1730 1731 truncate_pagecache_range(inode, 1732 (loff_t)index << PAGE_SHIFT, 1733 ((loff_t)pg_end << PAGE_SHIFT) - 1); 1734 1735 f2fs_lock_op(sbi, &lc); 1736 1737 set_new_dnode(&dn, inode, NULL, NULL, 0); 1738 ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE); 1739 if (ret) { 1740 f2fs_unlock_op(sbi, &lc); 1741 filemap_invalidate_unlock(mapping); 1742 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1743 goto out; 1744 } 1745 1746 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 1747 end = min(pg_end, end_offset - dn.ofs_in_node + index); 1748 1749 ret = f2fs_do_zero_range(&dn, index, end); 1750 f2fs_put_dnode(&dn); 1751 1752 f2fs_unlock_op(sbi, &lc); 1753 filemap_invalidate_unlock(mapping); 1754 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1755 1756 f2fs_balance_fs(sbi, dn.node_changed); 1757 1758 if (ret) 1759 goto out; 1760 1761 index = end; 1762 new_size = max_t(loff_t, new_size, 1763 (loff_t)index << PAGE_SHIFT); 1764 } 1765 1766 if (off_end) { 1767 ret = fill_zero(inode, pg_end, 0, off_end); 1768 if (ret) 1769 goto out; 1770 1771 new_size = max_t(loff_t, new_size, offset + len); 1772 } 1773 } 1774 1775 out: 1776 if (new_size > i_size_read(inode)) { 1777 if (mode & FALLOC_FL_KEEP_SIZE) 1778 file_set_keep_isize(inode); 1779 else 1780 f2fs_i_size_write(inode, new_size); 1781 } 1782 return ret; 1783 } 1784 1785 static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) 1786 { 1787 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1788 struct address_space *mapping = inode->i_mapping; 1789 pgoff_t nr, pg_start, pg_end, delta, idx; 1790 loff_t new_size; 1791 int ret = 0; 1792 1793 new_size = i_size_read(inode) + len; 1794 ret = inode_newsize_ok(inode, new_size); 1795 if (ret) 1796 return ret; 1797 1798 if (offset >= i_size_read(inode)) 1799 return -EINVAL; 1800 1801 /* insert range should be aligned to block size of f2fs. */ 1802 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) 1803 return -EINVAL; 1804 1805 ret = f2fs_convert_inline_inode(inode); 1806 if (ret) 1807 return ret; 1808 1809 f2fs_balance_fs(sbi, true); 1810 1811 filemap_invalidate_lock(mapping); 1812 ret = f2fs_truncate_blocks(inode, i_size_read(inode), true); 1813 filemap_invalidate_unlock(mapping); 1814 if (ret) 1815 return ret; 1816 1817 /* write out all dirty pages from offset */ 1818 ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX); 1819 if (ret) 1820 return ret; 1821 1822 pg_start = offset >> PAGE_SHIFT; 1823 pg_end = (offset + len) >> PAGE_SHIFT; 1824 delta = pg_end - pg_start; 1825 idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 1826 1827 /* avoid gc operation during block exchange */ 1828 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1829 filemap_invalidate_lock(mapping); 1830 1831 f2fs_zero_post_eof_page(inode, offset + len, false); 1832 truncate_pagecache(inode, offset); 1833 1834 while (!ret && idx > pg_start) { 1835 struct f2fs_lock_context lc; 1836 1837 nr = idx - pg_start; 1838 if (nr > delta) 1839 nr = delta; 1840 idx -= nr; 1841 1842 f2fs_lock_op(sbi, &lc); 1843 f2fs_drop_extent_tree(inode); 1844 1845 ret = __exchange_data_block(inode, inode, idx, 1846 idx + delta, nr, false); 1847 f2fs_unlock_op(sbi, &lc); 1848 } 1849 filemap_invalidate_unlock(mapping); 1850 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1851 if (ret) 1852 return ret; 1853 1854 /* write out all moved pages, if possible */ 1855 filemap_invalidate_lock(mapping); 1856 ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX); 1857 truncate_pagecache(inode, offset); 1858 filemap_invalidate_unlock(mapping); 1859 1860 if (!ret) 1861 f2fs_i_size_write(inode, new_size); 1862 return ret; 1863 } 1864 1865 static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, 1866 loff_t len, int mode) 1867 { 1868 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1869 struct f2fs_map_blocks map = { .m_next_pgofs = NULL, 1870 .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, 1871 .m_may_create = true }; 1872 struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, 1873 .init_gc_type = FG_GC, 1874 .should_migrate_blocks = false, 1875 .err_gc_skipped = true, 1876 .nr_free_secs = 0 }; 1877 pgoff_t pg_start, pg_end; 1878 loff_t new_size; 1879 loff_t off_end; 1880 block_t expanded = 0; 1881 int err; 1882 1883 err = inode_newsize_ok(inode, (len + offset)); 1884 if (err) 1885 return err; 1886 1887 err = f2fs_convert_inline_inode(inode); 1888 if (err) 1889 return err; 1890 1891 f2fs_zero_post_eof_page(inode, offset + len, true); 1892 1893 f2fs_balance_fs(sbi, true); 1894 1895 pg_start = ((unsigned long long)offset) >> PAGE_SHIFT; 1896 pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT; 1897 off_end = (offset + len) & (PAGE_SIZE - 1); 1898 1899 map.m_lblk = pg_start; 1900 map.m_len = pg_end - pg_start; 1901 if (off_end) 1902 map.m_len++; 1903 1904 if (!map.m_len) 1905 return 0; 1906 1907 if (f2fs_is_pinned_file(inode)) { 1908 block_t sec_blks = CAP_BLKS_PER_SEC(sbi); 1909 block_t sec_len = roundup(map.m_len, sec_blks); 1910 1911 map.m_len = sec_blks; 1912 next_alloc: 1913 f2fs_down_write(&sbi->pin_sem); 1914 1915 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 1916 if (has_not_enough_free_secs(sbi, 0, 0)) { 1917 f2fs_up_write(&sbi->pin_sem); 1918 err = -ENOSPC; 1919 f2fs_warn_ratelimited(sbi, 1920 "ino:%lu, start:%lu, end:%lu, need to trigger GC to " 1921 "reclaim enough free segment when checkpoint is enabled", 1922 inode->i_ino, pg_start, pg_end); 1923 goto out_err; 1924 } 1925 } 1926 1927 if (has_not_enough_free_secs(sbi, 0, 1928 sbi->reserved_pin_section)) { 1929 f2fs_down_write_trace(&sbi->gc_lock, &gc_control.lc); 1930 stat_inc_gc_call_count(sbi, FOREGROUND); 1931 err = f2fs_gc(sbi, &gc_control); 1932 if (err && err != -ENODATA) { 1933 f2fs_up_write(&sbi->pin_sem); 1934 goto out_err; 1935 } 1936 } 1937 1938 err = f2fs_allocate_pinning_section(sbi); 1939 if (err) { 1940 f2fs_up_write(&sbi->pin_sem); 1941 goto out_err; 1942 } 1943 1944 map.m_seg_type = CURSEG_COLD_DATA_PINNED; 1945 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO); 1946 file_dont_truncate(inode); 1947 1948 f2fs_up_write(&sbi->pin_sem); 1949 1950 expanded += map.m_len; 1951 sec_len -= map.m_len; 1952 map.m_lblk += map.m_len; 1953 if (!err && sec_len) 1954 goto next_alloc; 1955 1956 map.m_len = expanded; 1957 } else { 1958 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_AIO); 1959 expanded = map.m_len; 1960 } 1961 out_err: 1962 if (err) { 1963 pgoff_t last_off; 1964 1965 if (!expanded) 1966 return err; 1967 1968 last_off = pg_start + expanded - 1; 1969 1970 /* update new size to the failed position */ 1971 new_size = (last_off == pg_end) ? offset + len : 1972 (loff_t)(last_off + 1) << PAGE_SHIFT; 1973 } else { 1974 new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; 1975 } 1976 1977 if (new_size > i_size_read(inode)) { 1978 if (mode & FALLOC_FL_KEEP_SIZE) 1979 file_set_keep_isize(inode); 1980 else 1981 f2fs_i_size_write(inode, new_size); 1982 } 1983 1984 return err; 1985 } 1986 1987 static long f2fs_fallocate(struct file *file, int mode, 1988 loff_t offset, loff_t len) 1989 { 1990 struct inode *inode = file_inode(file); 1991 long ret = 0; 1992 1993 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 1994 return -EIO; 1995 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) 1996 return -ENOSPC; 1997 if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) 1998 return -EOPNOTSUPP; 1999 2000 /* f2fs only support ->fallocate for regular file */ 2001 if (!S_ISREG(inode->i_mode)) 2002 return -EINVAL; 2003 2004 if (IS_ENCRYPTED(inode) && 2005 (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) 2006 return -EOPNOTSUPP; 2007 2008 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 2009 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | 2010 FALLOC_FL_INSERT_RANGE)) 2011 return -EOPNOTSUPP; 2012 2013 inode_lock(inode); 2014 2015 /* 2016 * Pinned file should not support partial truncation since the block 2017 * can be used by applications. 2018 */ 2019 if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && 2020 (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | 2021 FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) { 2022 ret = -EOPNOTSUPP; 2023 goto out; 2024 } 2025 2026 ret = file_modified(file); 2027 if (ret) 2028 goto out; 2029 2030 /* 2031 * wait for inflight dio, blocks should be removed after IO 2032 * completion. 2033 */ 2034 inode_dio_wait(inode); 2035 2036 if (mode & FALLOC_FL_PUNCH_HOLE) { 2037 if (offset >= inode->i_size) 2038 goto out; 2039 2040 ret = f2fs_punch_hole(inode, offset, len); 2041 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { 2042 ret = f2fs_collapse_range(inode, offset, len); 2043 } else if (mode & FALLOC_FL_ZERO_RANGE) { 2044 ret = f2fs_zero_range(inode, offset, len, mode); 2045 } else if (mode & FALLOC_FL_INSERT_RANGE) { 2046 ret = f2fs_insert_range(inode, offset, len); 2047 } else { 2048 ret = f2fs_expand_inode_data(inode, offset, len, mode); 2049 } 2050 2051 if (!ret) { 2052 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 2053 f2fs_mark_inode_dirty_sync(inode, false); 2054 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2055 } 2056 2057 out: 2058 inode_unlock(inode); 2059 2060 trace_f2fs_fallocate(inode, mode, offset, len, ret); 2061 return ret; 2062 } 2063 2064 static int f2fs_release_file(struct inode *inode, struct file *filp) 2065 { 2066 if (atomic_dec_and_test(&F2FS_I(inode)->open_count)) 2067 f2fs_remove_donate_inode(inode); 2068 2069 /* 2070 * f2fs_release_file is called at every close calls. So we should 2071 * not drop any inmemory pages by close called by other process. 2072 */ 2073 if (!(filp->f_mode & FMODE_WRITE) || 2074 atomic_read(&inode->i_writecount) != 1) 2075 return 0; 2076 2077 inode_lock(inode); 2078 f2fs_abort_atomic_write(inode, true); 2079 inode_unlock(inode); 2080 2081 return 0; 2082 } 2083 2084 static int f2fs_file_flush(struct file *file, fl_owner_t id) 2085 { 2086 struct inode *inode = file_inode(file); 2087 2088 /* 2089 * If the process doing a transaction is crashed, we should do 2090 * roll-back. Otherwise, other reader/write can see corrupted database 2091 * until all the writers close its file. Since this should be done 2092 * before dropping file lock, it needs to do in ->flush. 2093 */ 2094 if (F2FS_I(inode)->atomic_write_task == current && 2095 (current->flags & PF_EXITING)) { 2096 inode_lock(inode); 2097 f2fs_abort_atomic_write(inode, true); 2098 inode_unlock(inode); 2099 } 2100 2101 return 0; 2102 } 2103 2104 static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) 2105 { 2106 struct f2fs_inode_info *fi = F2FS_I(inode); 2107 u32 masked_flags = fi->i_flags & mask; 2108 2109 /* mask can be shrunk by flags_valid selector */ 2110 iflags &= mask; 2111 2112 /* Is it quota file? Do not allow user to mess with it */ 2113 if (IS_NOQUOTA(inode)) 2114 return -EPERM; 2115 2116 if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) { 2117 if (!f2fs_sb_has_casefold(F2FS_I_SB(inode))) 2118 return -EOPNOTSUPP; 2119 if (!f2fs_empty_dir(inode)) 2120 return -ENOTEMPTY; 2121 } 2122 2123 if (iflags & (F2FS_COMPR_FL | F2FS_NOCOMP_FL)) { 2124 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 2125 return -EOPNOTSUPP; 2126 if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL)) 2127 return -EINVAL; 2128 } 2129 2130 if ((iflags ^ masked_flags) & F2FS_COMPR_FL) { 2131 if (masked_flags & F2FS_COMPR_FL) { 2132 if (!f2fs_disable_compressed_file(inode)) 2133 return -EINVAL; 2134 } else { 2135 /* try to convert inline_data to support compression */ 2136 int err = f2fs_convert_inline_inode(inode); 2137 if (err) 2138 return err; 2139 2140 f2fs_down_write(&fi->i_sem); 2141 if (!f2fs_may_compress(inode) || 2142 atomic_read(&fi->writeback) || 2143 (S_ISREG(inode->i_mode) && 2144 F2FS_HAS_BLOCKS(inode))) { 2145 f2fs_up_write(&fi->i_sem); 2146 return -EINVAL; 2147 } 2148 err = set_compress_context(inode); 2149 f2fs_up_write(&fi->i_sem); 2150 2151 if (err) 2152 return err; 2153 } 2154 } 2155 2156 fi->i_flags = iflags | (fi->i_flags & ~mask); 2157 f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) && 2158 (fi->i_flags & F2FS_NOCOMP_FL)); 2159 2160 if (fi->i_flags & F2FS_PROJINHERIT_FL) 2161 set_inode_flag(inode, FI_PROJ_INHERIT); 2162 else 2163 clear_inode_flag(inode, FI_PROJ_INHERIT); 2164 2165 inode_set_ctime_current(inode); 2166 f2fs_set_inode_flags(inode); 2167 f2fs_mark_inode_dirty_sync(inode, true); 2168 return 0; 2169 } 2170 2171 /* FS_IOC_[GS]ETFLAGS and FS_IOC_FS[GS]ETXATTR support */ 2172 2173 /* 2174 * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry 2175 * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to 2176 * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add 2177 * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL. 2178 * 2179 * Translating flags to fsx_flags value used by FS_IOC_FSGETXATTR and 2180 * FS_IOC_FSSETXATTR is done by the VFS. 2181 */ 2182 2183 static const struct { 2184 u32 iflag; 2185 u32 fsflag; 2186 } f2fs_fsflags_map[] = { 2187 { F2FS_COMPR_FL, FS_COMPR_FL }, 2188 { F2FS_SYNC_FL, FS_SYNC_FL }, 2189 { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL }, 2190 { F2FS_APPEND_FL, FS_APPEND_FL }, 2191 { F2FS_NODUMP_FL, FS_NODUMP_FL }, 2192 { F2FS_NOATIME_FL, FS_NOATIME_FL }, 2193 { F2FS_NOCOMP_FL, FS_NOCOMP_FL }, 2194 { F2FS_INDEX_FL, FS_INDEX_FL }, 2195 { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL }, 2196 { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL }, 2197 { F2FS_CASEFOLD_FL, FS_CASEFOLD_FL }, 2198 }; 2199 2200 #define F2FS_GETTABLE_FS_FL ( \ 2201 FS_COMPR_FL | \ 2202 FS_SYNC_FL | \ 2203 FS_IMMUTABLE_FL | \ 2204 FS_APPEND_FL | \ 2205 FS_NODUMP_FL | \ 2206 FS_NOATIME_FL | \ 2207 FS_NOCOMP_FL | \ 2208 FS_INDEX_FL | \ 2209 FS_DIRSYNC_FL | \ 2210 FS_PROJINHERIT_FL | \ 2211 FS_ENCRYPT_FL | \ 2212 FS_INLINE_DATA_FL | \ 2213 FS_NOCOW_FL | \ 2214 FS_VERITY_FL | \ 2215 FS_CASEFOLD_FL) 2216 2217 #define F2FS_SETTABLE_FS_FL ( \ 2218 FS_COMPR_FL | \ 2219 FS_SYNC_FL | \ 2220 FS_IMMUTABLE_FL | \ 2221 FS_APPEND_FL | \ 2222 FS_NODUMP_FL | \ 2223 FS_NOATIME_FL | \ 2224 FS_NOCOMP_FL | \ 2225 FS_DIRSYNC_FL | \ 2226 FS_PROJINHERIT_FL | \ 2227 FS_CASEFOLD_FL) 2228 2229 /* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */ 2230 static inline u32 f2fs_iflags_to_fsflags(u32 iflags) 2231 { 2232 u32 fsflags = 0; 2233 int i; 2234 2235 for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) 2236 if (iflags & f2fs_fsflags_map[i].iflag) 2237 fsflags |= f2fs_fsflags_map[i].fsflag; 2238 2239 return fsflags; 2240 } 2241 2242 /* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */ 2243 static inline u32 f2fs_fsflags_to_iflags(u32 fsflags) 2244 { 2245 u32 iflags = 0; 2246 int i; 2247 2248 for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) 2249 if (fsflags & f2fs_fsflags_map[i].fsflag) 2250 iflags |= f2fs_fsflags_map[i].iflag; 2251 2252 return iflags; 2253 } 2254 2255 static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) 2256 { 2257 struct inode *inode = file_inode(filp); 2258 2259 return put_user(inode->i_generation, (int __user *)arg); 2260 } 2261 2262 static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) 2263 { 2264 struct inode *inode = file_inode(filp); 2265 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2266 struct f2fs_inode_info *fi = F2FS_I(inode); 2267 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2268 loff_t isize; 2269 int ret; 2270 2271 if (!(filp->f_mode & FMODE_WRITE)) 2272 return -EBADF; 2273 2274 if (!inode_owner_or_capable(idmap, inode)) 2275 return -EACCES; 2276 2277 if (!S_ISREG(inode->i_mode)) 2278 return -EINVAL; 2279 2280 if (filp->f_flags & O_DIRECT) 2281 return -EINVAL; 2282 2283 ret = mnt_want_write_file(filp); 2284 if (ret) 2285 return ret; 2286 2287 inode_lock(inode); 2288 2289 if (!f2fs_disable_compressed_file(inode) || 2290 f2fs_is_pinned_file(inode)) { 2291 ret = -EINVAL; 2292 goto out; 2293 } 2294 2295 if (f2fs_is_atomic_file(inode)) 2296 goto out; 2297 2298 ret = f2fs_convert_inline_inode(inode); 2299 if (ret) 2300 goto out; 2301 2302 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 2303 f2fs_down_write(&fi->i_gc_rwsem[READ]); 2304 2305 /* 2306 * Should wait end_io to count F2FS_WB_CP_DATA correctly by 2307 * f2fs_is_atomic_file. 2308 */ 2309 if (get_dirty_pages(inode)) 2310 f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u", 2311 inode->i_ino, get_dirty_pages(inode)); 2312 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 2313 if (ret) 2314 goto out_unlock; 2315 2316 /* Check if the inode already has a COW inode */ 2317 if (fi->cow_inode == NULL) { 2318 /* Create a COW inode for atomic write */ 2319 struct dentry *dentry = file_dentry(filp); 2320 struct inode *dir = d_inode(dentry->d_parent); 2321 2322 ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode); 2323 if (ret) 2324 goto out_unlock; 2325 2326 set_inode_flag(fi->cow_inode, FI_COW_FILE); 2327 clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); 2328 2329 /* Set the COW inode's atomic_inode to the atomic inode */ 2330 F2FS_I(fi->cow_inode)->atomic_inode = inode; 2331 } else { 2332 /* Reuse the already created COW inode */ 2333 f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode)); 2334 2335 invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1); 2336 2337 ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); 2338 if (ret) 2339 goto out_unlock; 2340 } 2341 2342 f2fs_write_inode(inode, NULL); 2343 2344 stat_inc_atomic_inode(inode); 2345 2346 set_inode_flag(inode, FI_ATOMIC_FILE); 2347 2348 isize = i_size_read(inode); 2349 fi->original_i_size = isize; 2350 if (truncate) { 2351 set_inode_flag(inode, FI_ATOMIC_REPLACE); 2352 truncate_inode_pages_final(inode->i_mapping); 2353 f2fs_i_size_write(inode, 0); 2354 isize = 0; 2355 } 2356 f2fs_i_size_write(fi->cow_inode, isize); 2357 2358 out_unlock: 2359 f2fs_up_write(&fi->i_gc_rwsem[READ]); 2360 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 2361 if (ret) 2362 goto out; 2363 2364 f2fs_update_time(sbi, REQ_TIME); 2365 fi->atomic_write_task = current; 2366 stat_update_max_atomic_write(inode); 2367 fi->atomic_write_cnt = 0; 2368 out: 2369 inode_unlock(inode); 2370 mnt_drop_write_file(filp); 2371 return ret; 2372 } 2373 2374 static int f2fs_ioc_commit_atomic_write(struct file *filp) 2375 { 2376 struct inode *inode = file_inode(filp); 2377 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2378 int ret; 2379 2380 if (!(filp->f_mode & FMODE_WRITE)) 2381 return -EBADF; 2382 2383 if (!inode_owner_or_capable(idmap, inode)) 2384 return -EACCES; 2385 2386 ret = mnt_want_write_file(filp); 2387 if (ret) 2388 return ret; 2389 2390 f2fs_balance_fs(F2FS_I_SB(inode), true); 2391 2392 inode_lock(inode); 2393 2394 if (f2fs_is_atomic_file(inode)) { 2395 ret = f2fs_commit_atomic_write(inode); 2396 if (!ret) 2397 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); 2398 2399 f2fs_abort_atomic_write(inode, ret); 2400 } else { 2401 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); 2402 } 2403 2404 inode_unlock(inode); 2405 mnt_drop_write_file(filp); 2406 return ret; 2407 } 2408 2409 static int f2fs_ioc_abort_atomic_write(struct file *filp) 2410 { 2411 struct inode *inode = file_inode(filp); 2412 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2413 int ret; 2414 2415 if (!(filp->f_mode & FMODE_WRITE)) 2416 return -EBADF; 2417 2418 if (!inode_owner_or_capable(idmap, inode)) 2419 return -EACCES; 2420 2421 ret = mnt_want_write_file(filp); 2422 if (ret) 2423 return ret; 2424 2425 inode_lock(inode); 2426 2427 f2fs_abort_atomic_write(inode, true); 2428 2429 inode_unlock(inode); 2430 2431 mnt_drop_write_file(filp); 2432 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2433 return ret; 2434 } 2435 2436 int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, 2437 bool readonly, bool need_lock) 2438 { 2439 struct super_block *sb = sbi->sb; 2440 int ret = 0; 2441 2442 switch (flag) { 2443 case F2FS_GOING_DOWN_FULLSYNC: 2444 ret = bdev_freeze(sb->s_bdev); 2445 if (ret) 2446 goto out; 2447 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2448 bdev_thaw(sb->s_bdev); 2449 break; 2450 case F2FS_GOING_DOWN_METASYNC: 2451 /* do checkpoint only */ 2452 ret = f2fs_sync_fs(sb, 1); 2453 if (ret) { 2454 if (ret == -EIO) 2455 ret = 0; 2456 goto out; 2457 } 2458 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2459 break; 2460 case F2FS_GOING_DOWN_NOSYNC: 2461 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2462 break; 2463 case F2FS_GOING_DOWN_METAFLUSH: 2464 f2fs_sync_meta_pages(sbi, LONG_MAX, FS_META_IO); 2465 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2466 break; 2467 case F2FS_GOING_DOWN_NEED_FSCK: 2468 set_sbi_flag(sbi, SBI_NEED_FSCK); 2469 set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); 2470 set_sbi_flag(sbi, SBI_IS_DIRTY); 2471 /* do checkpoint only */ 2472 ret = f2fs_sync_fs(sb, 1); 2473 if (ret == -EIO) 2474 ret = 0; 2475 goto out; 2476 default: 2477 ret = -EINVAL; 2478 goto out; 2479 } 2480 2481 if (readonly) 2482 goto out; 2483 2484 /* 2485 * grab sb->s_umount to avoid racing w/ remount() and other shutdown 2486 * paths. 2487 */ 2488 if (need_lock) 2489 down_write(&sbi->sb->s_umount); 2490 2491 f2fs_stop_gc_thread(sbi); 2492 f2fs_stop_discard_thread(sbi); 2493 2494 f2fs_drop_discard_cmd(sbi); 2495 clear_opt(sbi, DISCARD); 2496 2497 if (need_lock) 2498 up_write(&sbi->sb->s_umount); 2499 2500 f2fs_update_time(sbi, REQ_TIME); 2501 out: 2502 2503 trace_f2fs_shutdown(sbi, flag, ret); 2504 2505 return ret; 2506 } 2507 2508 static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) 2509 { 2510 struct inode *inode = file_inode(filp); 2511 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2512 __u32 in; 2513 int ret; 2514 bool need_drop = false, readonly = false; 2515 2516 if (!capable(CAP_SYS_ADMIN)) 2517 return -EPERM; 2518 2519 if (get_user(in, (__u32 __user *)arg)) 2520 return -EFAULT; 2521 2522 if (in != F2FS_GOING_DOWN_FULLSYNC) { 2523 ret = mnt_want_write_file(filp); 2524 if (ret) { 2525 if (ret != -EROFS) 2526 return ret; 2527 2528 /* fallback to nosync shutdown for readonly fs */ 2529 in = F2FS_GOING_DOWN_NOSYNC; 2530 readonly = true; 2531 } else { 2532 need_drop = true; 2533 } 2534 } 2535 2536 ret = f2fs_do_shutdown(sbi, in, readonly, true); 2537 2538 if (need_drop) 2539 mnt_drop_write_file(filp); 2540 2541 return ret; 2542 } 2543 2544 static int f2fs_keep_noreuse_range(struct inode *inode, 2545 loff_t offset, loff_t len) 2546 { 2547 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2548 u64 max_bytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 2549 u64 start, end; 2550 int ret = 0; 2551 2552 if (!S_ISREG(inode->i_mode)) 2553 return 0; 2554 2555 if (offset >= max_bytes || len > max_bytes || 2556 (offset + len) > max_bytes) 2557 return 0; 2558 2559 start = offset >> PAGE_SHIFT; 2560 end = DIV_ROUND_UP(offset + len, PAGE_SIZE); 2561 2562 inode_lock(inode); 2563 if (f2fs_is_atomic_file(inode)) { 2564 inode_unlock(inode); 2565 return 0; 2566 } 2567 2568 spin_lock(&sbi->inode_lock[DONATE_INODE]); 2569 /* let's remove the range, if len = 0 */ 2570 if (!len) { 2571 if (!list_empty(&F2FS_I(inode)->gdonate_list)) { 2572 list_del_init(&F2FS_I(inode)->gdonate_list); 2573 sbi->donate_files--; 2574 if (is_inode_flag_set(inode, FI_DONATE_FINISHED)) 2575 ret = -EALREADY; 2576 else 2577 set_inode_flag(inode, FI_DONATE_FINISHED); 2578 } else 2579 ret = -ENOENT; 2580 } else { 2581 if (list_empty(&F2FS_I(inode)->gdonate_list)) { 2582 list_add_tail(&F2FS_I(inode)->gdonate_list, 2583 &sbi->inode_list[DONATE_INODE]); 2584 sbi->donate_files++; 2585 } else { 2586 list_move_tail(&F2FS_I(inode)->gdonate_list, 2587 &sbi->inode_list[DONATE_INODE]); 2588 } 2589 F2FS_I(inode)->donate_start = start; 2590 F2FS_I(inode)->donate_end = end - 1; 2591 clear_inode_flag(inode, FI_DONATE_FINISHED); 2592 } 2593 spin_unlock(&sbi->inode_lock[DONATE_INODE]); 2594 inode_unlock(inode); 2595 2596 return ret; 2597 } 2598 2599 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) 2600 { 2601 struct inode *inode = file_inode(filp); 2602 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2603 struct fstrim_range range; 2604 int ret; 2605 2606 if (!capable(CAP_SYS_ADMIN)) 2607 return -EPERM; 2608 2609 if (!f2fs_hw_support_discard(sbi)) 2610 return -EOPNOTSUPP; 2611 2612 if (copy_from_user(&range, (struct fstrim_range __user *)arg, 2613 sizeof(range))) 2614 return -EFAULT; 2615 2616 ret = mnt_want_write_file(filp); 2617 if (ret) 2618 return ret; 2619 2620 range.minlen = max_t(unsigned int, range.minlen, 2621 f2fs_hw_discard_granularity(sbi)); 2622 ret = f2fs_trim_fs(sbi, &range); 2623 mnt_drop_write_file(filp); 2624 if (ret < 0) 2625 return ret; 2626 2627 if (copy_to_user((struct fstrim_range __user *)arg, &range, 2628 sizeof(range))) 2629 return -EFAULT; 2630 f2fs_update_time(sbi, REQ_TIME); 2631 return 0; 2632 } 2633 2634 static bool uuid_is_nonzero(__u8 u[16]) 2635 { 2636 int i; 2637 2638 for (i = 0; i < 16; i++) 2639 if (u[i]) 2640 return true; 2641 return false; 2642 } 2643 2644 static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) 2645 { 2646 struct inode *inode = file_inode(filp); 2647 int ret; 2648 2649 if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode))) 2650 return -EOPNOTSUPP; 2651 2652 ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg); 2653 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2654 return ret; 2655 } 2656 2657 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) 2658 { 2659 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2660 return -EOPNOTSUPP; 2661 return fscrypt_ioctl_get_policy(filp, (void __user *)arg); 2662 } 2663 2664 static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) 2665 { 2666 struct inode *inode = file_inode(filp); 2667 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2668 u8 encrypt_pw_salt[16]; 2669 int err; 2670 2671 if (!f2fs_sb_has_encrypt(sbi)) 2672 return -EOPNOTSUPP; 2673 2674 err = mnt_want_write_file(filp); 2675 if (err) 2676 return err; 2677 2678 f2fs_down_write(&sbi->sb_lock); 2679 2680 if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) 2681 goto got_it; 2682 2683 /* update superblock with uuid */ 2684 generate_random_uuid(sbi->raw_super->encrypt_pw_salt); 2685 2686 err = f2fs_commit_super(sbi, false); 2687 if (err) { 2688 /* undo new data */ 2689 memset(sbi->raw_super->encrypt_pw_salt, 0, 16); 2690 goto out_err; 2691 } 2692 got_it: 2693 memcpy(encrypt_pw_salt, sbi->raw_super->encrypt_pw_salt, 16); 2694 out_err: 2695 f2fs_up_write(&sbi->sb_lock); 2696 mnt_drop_write_file(filp); 2697 2698 if (!err && copy_to_user((__u8 __user *)arg, encrypt_pw_salt, 16)) 2699 err = -EFAULT; 2700 2701 return err; 2702 } 2703 2704 static int f2fs_ioc_get_encryption_policy_ex(struct file *filp, 2705 unsigned long arg) 2706 { 2707 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2708 return -EOPNOTSUPP; 2709 2710 return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg); 2711 } 2712 2713 static int f2fs_ioc_add_encryption_key(struct file *filp, unsigned long arg) 2714 { 2715 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2716 return -EOPNOTSUPP; 2717 2718 return fscrypt_ioctl_add_key(filp, (void __user *)arg); 2719 } 2720 2721 static int f2fs_ioc_remove_encryption_key(struct file *filp, unsigned long arg) 2722 { 2723 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2724 return -EOPNOTSUPP; 2725 2726 return fscrypt_ioctl_remove_key(filp, (void __user *)arg); 2727 } 2728 2729 static int f2fs_ioc_remove_encryption_key_all_users(struct file *filp, 2730 unsigned long arg) 2731 { 2732 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2733 return -EOPNOTSUPP; 2734 2735 return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg); 2736 } 2737 2738 static int f2fs_ioc_get_encryption_key_status(struct file *filp, 2739 unsigned long arg) 2740 { 2741 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2742 return -EOPNOTSUPP; 2743 2744 return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); 2745 } 2746 2747 static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg) 2748 { 2749 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2750 return -EOPNOTSUPP; 2751 2752 return fscrypt_ioctl_get_nonce(filp, (void __user *)arg); 2753 } 2754 2755 static int f2fs_ioc_gc(struct file *filp, unsigned long arg) 2756 { 2757 struct inode *inode = file_inode(filp); 2758 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2759 struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, 2760 .no_bg_gc = false, 2761 .should_migrate_blocks = false, 2762 .nr_free_secs = 0 }; 2763 __u32 sync; 2764 int ret; 2765 2766 if (!capable(CAP_SYS_ADMIN)) 2767 return -EPERM; 2768 2769 if (get_user(sync, (__u32 __user *)arg)) 2770 return -EFAULT; 2771 2772 if (f2fs_readonly(sbi->sb)) 2773 return -EROFS; 2774 2775 ret = mnt_want_write_file(filp); 2776 if (ret) 2777 return ret; 2778 2779 if (!sync) { 2780 if (!f2fs_down_write_trylock_trace(&sbi->gc_lock, 2781 &gc_control.lc)) { 2782 ret = -EBUSY; 2783 goto out; 2784 } 2785 } else { 2786 f2fs_down_write_trace(&sbi->gc_lock, &gc_control.lc); 2787 } 2788 2789 gc_control.init_gc_type = sync ? FG_GC : BG_GC; 2790 gc_control.err_gc_skipped = sync; 2791 stat_inc_gc_call_count(sbi, FOREGROUND); 2792 ret = f2fs_gc(sbi, &gc_control); 2793 out: 2794 mnt_drop_write_file(filp); 2795 return ret; 2796 } 2797 2798 static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range) 2799 { 2800 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); 2801 struct f2fs_gc_control gc_control = { 2802 .init_gc_type = range->sync ? FG_GC : BG_GC, 2803 .no_bg_gc = false, 2804 .should_migrate_blocks = false, 2805 .err_gc_skipped = range->sync, 2806 .nr_free_secs = 0 }; 2807 u64 end; 2808 int ret; 2809 2810 if (!capable(CAP_SYS_ADMIN)) 2811 return -EPERM; 2812 if (f2fs_readonly(sbi->sb)) 2813 return -EROFS; 2814 2815 end = range->start + range->len; 2816 if (end < range->start || range->start < MAIN_BLKADDR(sbi) || 2817 end >= MAX_BLKADDR(sbi)) 2818 return -EINVAL; 2819 2820 ret = mnt_want_write_file(filp); 2821 if (ret) 2822 return ret; 2823 2824 do_more: 2825 if (!range->sync) { 2826 if (!f2fs_down_write_trylock_trace(&sbi->gc_lock, &gc_control.lc)) { 2827 ret = -EBUSY; 2828 goto out; 2829 } 2830 } else { 2831 f2fs_down_write_trace(&sbi->gc_lock, &gc_control.lc); 2832 } 2833 2834 gc_control.victim_segno = GET_SEGNO(sbi, range->start); 2835 stat_inc_gc_call_count(sbi, FOREGROUND); 2836 ret = f2fs_gc(sbi, &gc_control); 2837 if (ret) { 2838 if (ret == -EBUSY) 2839 ret = -EAGAIN; 2840 goto out; 2841 } 2842 range->start += CAP_BLKS_PER_SEC(sbi); 2843 if (range->start <= end) 2844 goto do_more; 2845 out: 2846 mnt_drop_write_file(filp); 2847 return ret; 2848 } 2849 2850 static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) 2851 { 2852 struct f2fs_gc_range range; 2853 2854 if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg, 2855 sizeof(range))) 2856 return -EFAULT; 2857 return __f2fs_ioc_gc_range(filp, &range); 2858 } 2859 2860 static int f2fs_ioc_write_checkpoint(struct file *filp) 2861 { 2862 struct inode *inode = file_inode(filp); 2863 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2864 int ret; 2865 2866 if (!capable(CAP_SYS_ADMIN)) 2867 return -EPERM; 2868 2869 if (f2fs_readonly(sbi->sb)) 2870 return -EROFS; 2871 2872 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 2873 f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled."); 2874 return -EINVAL; 2875 } 2876 2877 ret = mnt_want_write_file(filp); 2878 if (ret) 2879 return ret; 2880 2881 ret = f2fs_sync_fs(sbi->sb, 1); 2882 2883 mnt_drop_write_file(filp); 2884 return ret; 2885 } 2886 2887 static int f2fs_defragment_range(struct f2fs_sb_info *sbi, 2888 struct file *filp, 2889 struct f2fs_defragment *range) 2890 { 2891 struct inode *inode = file_inode(filp); 2892 struct f2fs_map_blocks map = { .m_next_extent = NULL, 2893 .m_seg_type = NO_CHECK_TYPE, 2894 .m_may_create = false }; 2895 struct extent_info ei = {}; 2896 pgoff_t pg_start, pg_end, next_pgofs; 2897 unsigned int total = 0, sec_num; 2898 block_t blk_end = 0; 2899 bool fragmented = false; 2900 int err; 2901 2902 f2fs_balance_fs(sbi, true); 2903 2904 inode_lock(inode); 2905 pg_start = range->start >> PAGE_SHIFT; 2906 pg_end = min_t(pgoff_t, 2907 (range->start + range->len) >> PAGE_SHIFT, 2908 DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE)); 2909 2910 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) || 2911 f2fs_is_atomic_file(inode)) { 2912 err = -EINVAL; 2913 goto unlock_out; 2914 } 2915 2916 /* if in-place-update policy is enabled, don't waste time here */ 2917 set_inode_flag(inode, FI_OPU_WRITE); 2918 if (f2fs_should_update_inplace(inode, NULL)) { 2919 err = -EINVAL; 2920 goto out; 2921 } 2922 2923 /* writeback all dirty pages in the range */ 2924 err = filemap_write_and_wait_range(inode->i_mapping, 2925 pg_start << PAGE_SHIFT, 2926 (pg_end << PAGE_SHIFT) - 1); 2927 if (err) 2928 goto out; 2929 2930 /* 2931 * lookup mapping info in extent cache, skip defragmenting if physical 2932 * block addresses are continuous. 2933 */ 2934 if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) { 2935 if ((pgoff_t)ei.fofs + ei.len >= pg_end) 2936 goto out; 2937 } 2938 2939 map.m_lblk = pg_start; 2940 map.m_next_pgofs = &next_pgofs; 2941 2942 /* 2943 * lookup mapping info in dnode page cache, skip defragmenting if all 2944 * physical block addresses are continuous even if there are hole(s) 2945 * in logical blocks. 2946 */ 2947 while (map.m_lblk < pg_end) { 2948 map.m_len = pg_end - map.m_lblk; 2949 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); 2950 if (err) 2951 goto out; 2952 2953 if (!(map.m_flags & F2FS_MAP_FLAGS)) { 2954 map.m_lblk = next_pgofs; 2955 continue; 2956 } 2957 2958 if (blk_end && blk_end != map.m_pblk) 2959 fragmented = true; 2960 2961 /* record total count of block that we're going to move */ 2962 total += map.m_len; 2963 2964 blk_end = map.m_pblk + map.m_len; 2965 2966 map.m_lblk += map.m_len; 2967 } 2968 2969 if (!fragmented) { 2970 total = 0; 2971 goto out; 2972 } 2973 2974 sec_num = DIV_ROUND_UP(total, CAP_BLKS_PER_SEC(sbi)); 2975 2976 /* 2977 * make sure there are enough free section for LFS allocation, this can 2978 * avoid defragment running in SSR mode when free section are allocated 2979 * intensively 2980 */ 2981 if (has_not_enough_free_secs(sbi, 0, sec_num)) { 2982 err = -EAGAIN; 2983 goto out; 2984 } 2985 2986 map.m_lblk = pg_start; 2987 map.m_len = pg_end - pg_start; 2988 total = 0; 2989 2990 while (map.m_lblk < pg_end) { 2991 pgoff_t idx; 2992 int cnt = 0; 2993 2994 do_map: 2995 map.m_len = pg_end - map.m_lblk; 2996 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); 2997 if (err) 2998 goto clear_out; 2999 3000 if (!(map.m_flags & F2FS_MAP_FLAGS)) { 3001 map.m_lblk = next_pgofs; 3002 goto check; 3003 } 3004 3005 set_inode_flag(inode, FI_SKIP_WRITES); 3006 3007 idx = map.m_lblk; 3008 while (idx < map.m_lblk + map.m_len && 3009 cnt < BLKS_PER_SEG(sbi)) { 3010 struct folio *folio; 3011 3012 folio = f2fs_get_lock_data_folio(inode, idx, true); 3013 if (IS_ERR(folio)) { 3014 err = PTR_ERR(folio); 3015 goto clear_out; 3016 } 3017 3018 f2fs_folio_wait_writeback(folio, DATA, true, true); 3019 3020 folio_mark_dirty(folio); 3021 folio_set_f2fs_gcing(folio); 3022 f2fs_folio_put(folio, true); 3023 3024 idx++; 3025 cnt++; 3026 total++; 3027 } 3028 3029 map.m_lblk = idx; 3030 check: 3031 if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi)) 3032 goto do_map; 3033 3034 clear_inode_flag(inode, FI_SKIP_WRITES); 3035 3036 err = filemap_fdatawrite(inode->i_mapping); 3037 if (err) 3038 goto out; 3039 } 3040 clear_out: 3041 clear_inode_flag(inode, FI_SKIP_WRITES); 3042 out: 3043 clear_inode_flag(inode, FI_OPU_WRITE); 3044 unlock_out: 3045 inode_unlock(inode); 3046 if (!err) 3047 range->len = (u64)total << PAGE_SHIFT; 3048 return err; 3049 } 3050 3051 static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) 3052 { 3053 struct inode *inode = file_inode(filp); 3054 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3055 struct f2fs_defragment range; 3056 int err; 3057 3058 if (!capable(CAP_SYS_ADMIN)) 3059 return -EPERM; 3060 3061 if (!S_ISREG(inode->i_mode)) 3062 return -EINVAL; 3063 3064 if (f2fs_readonly(sbi->sb)) 3065 return -EROFS; 3066 3067 if (copy_from_user(&range, (struct f2fs_defragment __user *)arg, 3068 sizeof(range))) 3069 return -EFAULT; 3070 3071 /* verify alignment of offset & size */ 3072 if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1)) 3073 return -EINVAL; 3074 3075 if (unlikely((range.start + range.len) >> PAGE_SHIFT > 3076 max_file_blocks(inode))) 3077 return -EINVAL; 3078 3079 err = mnt_want_write_file(filp); 3080 if (err) 3081 return err; 3082 3083 err = f2fs_defragment_range(sbi, filp, &range); 3084 mnt_drop_write_file(filp); 3085 3086 if (range.len) 3087 f2fs_update_time(sbi, REQ_TIME); 3088 if (err < 0) 3089 return err; 3090 3091 if (copy_to_user((struct f2fs_defragment __user *)arg, &range, 3092 sizeof(range))) 3093 return -EFAULT; 3094 3095 return 0; 3096 } 3097 3098 static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, 3099 struct file *file_out, loff_t pos_out, size_t len) 3100 { 3101 struct inode *src = file_inode(file_in); 3102 struct inode *dst = file_inode(file_out); 3103 struct f2fs_sb_info *sbi = F2FS_I_SB(src); 3104 struct f2fs_lock_context lc; 3105 size_t olen = len, dst_max_i_size = 0; 3106 size_t dst_osize; 3107 int ret; 3108 3109 if (file_in->f_path.mnt != file_out->f_path.mnt || 3110 src->i_sb != dst->i_sb) 3111 return -EXDEV; 3112 3113 if (unlikely(f2fs_readonly(src->i_sb))) 3114 return -EROFS; 3115 3116 if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode)) 3117 return -EINVAL; 3118 3119 if (IS_ENCRYPTED(src) || IS_ENCRYPTED(dst)) 3120 return -EOPNOTSUPP; 3121 3122 if (pos_out < 0 || pos_in < 0) 3123 return -EINVAL; 3124 3125 if (src == dst) { 3126 if (pos_in == pos_out) 3127 return 0; 3128 if (pos_out > pos_in && pos_out < pos_in + len) 3129 return -EINVAL; 3130 } 3131 3132 inode_lock(src); 3133 if (src != dst) { 3134 ret = -EBUSY; 3135 if (!inode_trylock(dst)) 3136 goto out; 3137 } 3138 3139 if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) || 3140 f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) { 3141 ret = -EOPNOTSUPP; 3142 goto out_unlock; 3143 } 3144 3145 if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) { 3146 ret = -EINVAL; 3147 goto out_unlock; 3148 } 3149 3150 ret = -EINVAL; 3151 if (pos_in + len > src->i_size || pos_in + len < pos_in) 3152 goto out_unlock; 3153 if (len == 0) 3154 olen = len = src->i_size - pos_in; 3155 if (pos_in + len == src->i_size) 3156 len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in; 3157 if (len == 0) { 3158 ret = 0; 3159 goto out_unlock; 3160 } 3161 3162 dst_osize = dst->i_size; 3163 if (pos_out + olen > dst->i_size) 3164 dst_max_i_size = pos_out + olen; 3165 3166 /* verify the end result is block aligned */ 3167 if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) || 3168 !IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) || 3169 !IS_ALIGNED(pos_out, F2FS_BLKSIZE)) 3170 goto out_unlock; 3171 3172 ret = f2fs_convert_inline_inode(src); 3173 if (ret) 3174 goto out_unlock; 3175 3176 ret = f2fs_convert_inline_inode(dst); 3177 if (ret) 3178 goto out_unlock; 3179 3180 /* write out all dirty pages from offset */ 3181 ret = filemap_write_and_wait_range(src->i_mapping, 3182 pos_in, pos_in + len); 3183 if (ret) 3184 goto out_unlock; 3185 3186 ret = filemap_write_and_wait_range(dst->i_mapping, 3187 pos_out, pos_out + len); 3188 if (ret) 3189 goto out_unlock; 3190 3191 f2fs_balance_fs(sbi, true); 3192 3193 f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); 3194 if (src != dst) { 3195 ret = -EBUSY; 3196 if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) 3197 goto out_src; 3198 } 3199 3200 f2fs_lock_op(sbi, &lc); 3201 ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in), 3202 F2FS_BYTES_TO_BLK(pos_out), 3203 F2FS_BYTES_TO_BLK(len), false); 3204 3205 if (!ret) { 3206 if (dst_max_i_size) 3207 f2fs_i_size_write(dst, dst_max_i_size); 3208 else if (dst_osize != dst->i_size) 3209 f2fs_i_size_write(dst, dst_osize); 3210 } 3211 f2fs_unlock_op(sbi, &lc); 3212 3213 if (src != dst) 3214 f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); 3215 out_src: 3216 f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); 3217 if (ret) 3218 goto out_unlock; 3219 3220 inode_set_mtime_to_ts(src, inode_set_ctime_current(src)); 3221 f2fs_mark_inode_dirty_sync(src, false); 3222 if (src != dst) { 3223 inode_set_mtime_to_ts(dst, inode_set_ctime_current(dst)); 3224 f2fs_mark_inode_dirty_sync(dst, false); 3225 } 3226 f2fs_update_time(sbi, REQ_TIME); 3227 3228 out_unlock: 3229 if (src != dst) 3230 inode_unlock(dst); 3231 out: 3232 inode_unlock(src); 3233 return ret; 3234 } 3235 3236 static int __f2fs_ioc_move_range(struct file *filp, 3237 struct f2fs_move_range *range) 3238 { 3239 int err; 3240 3241 if (!(filp->f_mode & FMODE_READ) || 3242 !(filp->f_mode & FMODE_WRITE)) 3243 return -EBADF; 3244 3245 CLASS(fd, dst)(range->dst_fd); 3246 if (fd_empty(dst)) 3247 return -EBADF; 3248 3249 if (!(fd_file(dst)->f_mode & FMODE_WRITE)) 3250 return -EBADF; 3251 3252 err = mnt_want_write_file(filp); 3253 if (err) 3254 return err; 3255 3256 err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst), 3257 range->pos_out, range->len); 3258 3259 mnt_drop_write_file(filp); 3260 return err; 3261 } 3262 3263 static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) 3264 { 3265 struct f2fs_move_range range; 3266 3267 if (copy_from_user(&range, (struct f2fs_move_range __user *)arg, 3268 sizeof(range))) 3269 return -EFAULT; 3270 return __f2fs_ioc_move_range(filp, &range); 3271 } 3272 3273 static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) 3274 { 3275 struct inode *inode = file_inode(filp); 3276 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3277 struct sit_info *sm = SIT_I(sbi); 3278 unsigned int start_segno = 0, end_segno = 0; 3279 unsigned int dev_start_segno = 0, dev_end_segno = 0; 3280 struct f2fs_flush_device range; 3281 struct f2fs_gc_control gc_control = { 3282 .init_gc_type = FG_GC, 3283 .should_migrate_blocks = true, 3284 .err_gc_skipped = true, 3285 .nr_free_secs = 0 }; 3286 int ret; 3287 3288 if (!capable(CAP_SYS_ADMIN)) 3289 return -EPERM; 3290 3291 if (f2fs_readonly(sbi->sb)) 3292 return -EROFS; 3293 3294 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) 3295 return -EINVAL; 3296 3297 if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, 3298 sizeof(range))) 3299 return -EFAULT; 3300 3301 if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num || 3302 __is_large_section(sbi)) { 3303 f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1", 3304 range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi)); 3305 return -EINVAL; 3306 } 3307 3308 ret = mnt_want_write_file(filp); 3309 if (ret) 3310 return ret; 3311 3312 if (range.dev_num != 0) 3313 dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk); 3314 dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk); 3315 3316 start_segno = sm->last_victim[FLUSH_DEVICE]; 3317 if (start_segno < dev_start_segno || start_segno >= dev_end_segno) 3318 start_segno = dev_start_segno; 3319 end_segno = min(start_segno + range.segments, dev_end_segno); 3320 3321 while (start_segno < end_segno) { 3322 if (!f2fs_down_write_trylock_trace(&sbi->gc_lock, &gc_control.lc)) { 3323 ret = -EBUSY; 3324 goto out; 3325 } 3326 sm->last_victim[GC_CB] = end_segno + 1; 3327 sm->last_victim[GC_GREEDY] = end_segno + 1; 3328 sm->last_victim[ALLOC_NEXT] = end_segno + 1; 3329 3330 gc_control.victim_segno = start_segno; 3331 stat_inc_gc_call_count(sbi, FOREGROUND); 3332 ret = f2fs_gc(sbi, &gc_control); 3333 if (ret == -EAGAIN) 3334 ret = 0; 3335 else if (ret < 0) 3336 break; 3337 start_segno++; 3338 } 3339 out: 3340 mnt_drop_write_file(filp); 3341 return ret; 3342 } 3343 3344 static int f2fs_ioc_get_features(struct file *filp, unsigned long arg) 3345 { 3346 struct inode *inode = file_inode(filp); 3347 u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature); 3348 3349 /* Must validate to set it with SQLite behavior in Android. */ 3350 sb_feature |= F2FS_FEATURE_ATOMIC_WRITE; 3351 3352 return put_user(sb_feature, (u32 __user *)arg); 3353 } 3354 3355 #ifdef CONFIG_QUOTA 3356 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) 3357 { 3358 struct dquot *transfer_to[MAXQUOTAS] = {}; 3359 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3360 struct super_block *sb = sbi->sb; 3361 int err; 3362 3363 transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); 3364 if (IS_ERR(transfer_to[PRJQUOTA])) 3365 return PTR_ERR(transfer_to[PRJQUOTA]); 3366 3367 err = __dquot_transfer(inode, transfer_to); 3368 if (err) 3369 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); 3370 dqput(transfer_to[PRJQUOTA]); 3371 return err; 3372 } 3373 3374 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) 3375 { 3376 struct f2fs_inode_info *fi = F2FS_I(inode); 3377 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3378 struct f2fs_inode *ri = NULL; 3379 struct f2fs_lock_context lc; 3380 kprojid_t kprojid; 3381 int err; 3382 3383 if (!f2fs_sb_has_project_quota(sbi)) { 3384 if (projid != F2FS_DEF_PROJID) 3385 return -EOPNOTSUPP; 3386 else 3387 return 0; 3388 } 3389 3390 if (!f2fs_has_extra_attr(inode)) 3391 return -EOPNOTSUPP; 3392 3393 kprojid = make_kprojid(&init_user_ns, (projid_t)projid); 3394 3395 if (projid_eq(kprojid, fi->i_projid)) 3396 return 0; 3397 3398 err = -EPERM; 3399 /* Is it quota file? Do not allow user to mess with it */ 3400 if (IS_NOQUOTA(inode)) 3401 return err; 3402 3403 if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) 3404 return -EOVERFLOW; 3405 3406 err = f2fs_dquot_initialize(inode); 3407 if (err) 3408 return err; 3409 3410 f2fs_lock_op(sbi, &lc); 3411 err = f2fs_transfer_project_quota(inode, kprojid); 3412 if (err) 3413 goto out_unlock; 3414 3415 fi->i_projid = kprojid; 3416 inode_set_ctime_current(inode); 3417 f2fs_mark_inode_dirty_sync(inode, true); 3418 out_unlock: 3419 f2fs_unlock_op(sbi, &lc); 3420 return err; 3421 } 3422 #else 3423 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) 3424 { 3425 return 0; 3426 } 3427 3428 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) 3429 { 3430 if (projid != F2FS_DEF_PROJID) 3431 return -EOPNOTSUPP; 3432 return 0; 3433 } 3434 #endif 3435 3436 int f2fs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) 3437 { 3438 struct inode *inode = d_inode(dentry); 3439 struct f2fs_inode_info *fi = F2FS_I(inode); 3440 u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags); 3441 3442 if (IS_ENCRYPTED(inode)) 3443 fsflags |= FS_ENCRYPT_FL; 3444 if (IS_VERITY(inode)) 3445 fsflags |= FS_VERITY_FL; 3446 if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) 3447 fsflags |= FS_INLINE_DATA_FL; 3448 if (is_inode_flag_set(inode, FI_PIN_FILE)) 3449 fsflags |= FS_NOCOW_FL; 3450 3451 fileattr_fill_flags(fa, fsflags & F2FS_GETTABLE_FS_FL); 3452 3453 if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) 3454 fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid); 3455 3456 return 0; 3457 } 3458 3459 int f2fs_fileattr_set(struct mnt_idmap *idmap, 3460 struct dentry *dentry, struct file_kattr *fa) 3461 { 3462 struct inode *inode = d_inode(dentry); 3463 u32 fsflags = fa->flags, mask = F2FS_SETTABLE_FS_FL; 3464 u32 iflags; 3465 int err; 3466 3467 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 3468 return -EIO; 3469 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) 3470 return -ENOSPC; 3471 if (fsflags & ~F2FS_GETTABLE_FS_FL) 3472 return -EOPNOTSUPP; 3473 fsflags &= F2FS_SETTABLE_FS_FL; 3474 if (!fa->flags_valid) 3475 mask &= FS_COMMON_FL; 3476 3477 iflags = f2fs_fsflags_to_iflags(fsflags); 3478 if (f2fs_mask_flags(inode->i_mode, iflags) != iflags) 3479 return -EOPNOTSUPP; 3480 3481 err = f2fs_setflags_common(inode, iflags, f2fs_fsflags_to_iflags(mask)); 3482 if (!err) 3483 err = f2fs_ioc_setproject(inode, fa->fsx_projid); 3484 3485 return err; 3486 } 3487 3488 int f2fs_pin_file_control(struct inode *inode, bool inc) 3489 { 3490 struct f2fs_inode_info *fi = F2FS_I(inode); 3491 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3492 3493 if (IS_DEVICE_ALIASING(inode)) 3494 return -EINVAL; 3495 3496 if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { 3497 f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", 3498 __func__, inode->i_ino, fi->i_gc_failures); 3499 clear_inode_flag(inode, FI_PIN_FILE); 3500 return -EAGAIN; 3501 } 3502 3503 /* Use i_gc_failures for normal file as a risk signal. */ 3504 if (inc) 3505 f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1); 3506 3507 return 0; 3508 } 3509 3510 static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) 3511 { 3512 struct inode *inode = file_inode(filp); 3513 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3514 __u32 pin; 3515 int ret = 0; 3516 3517 if (get_user(pin, (__u32 __user *)arg)) 3518 return -EFAULT; 3519 3520 if (!S_ISREG(inode->i_mode)) 3521 return -EINVAL; 3522 3523 if (f2fs_readonly(sbi->sb)) 3524 return -EROFS; 3525 3526 if (!pin && IS_DEVICE_ALIASING(inode)) 3527 return -EOPNOTSUPP; 3528 3529 ret = mnt_want_write_file(filp); 3530 if (ret) 3531 return ret; 3532 3533 inode_lock(inode); 3534 3535 if (f2fs_is_atomic_file(inode)) { 3536 ret = -EINVAL; 3537 goto out; 3538 } 3539 3540 if (!pin) { 3541 clear_inode_flag(inode, FI_PIN_FILE); 3542 f2fs_i_gc_failures_write(inode, 0); 3543 goto done; 3544 } else if (f2fs_is_pinned_file(inode)) { 3545 goto done; 3546 } 3547 3548 if (F2FS_HAS_BLOCKS(inode)) { 3549 ret = -EFBIG; 3550 goto out; 3551 } 3552 3553 /* Let's allow file pinning on zoned device. */ 3554 if (!f2fs_sb_has_blkzoned(sbi) && 3555 f2fs_should_update_outplace(inode, NULL)) { 3556 ret = -EINVAL; 3557 goto out; 3558 } 3559 3560 if (f2fs_pin_file_control(inode, false)) { 3561 ret = -EAGAIN; 3562 goto out; 3563 } 3564 3565 ret = f2fs_convert_inline_inode(inode); 3566 if (ret) 3567 goto out; 3568 3569 if (!f2fs_disable_compressed_file(inode)) { 3570 ret = -EOPNOTSUPP; 3571 goto out; 3572 } 3573 3574 set_inode_flag(inode, FI_PIN_FILE); 3575 ret = F2FS_I(inode)->i_gc_failures; 3576 done: 3577 f2fs_update_time(sbi, REQ_TIME); 3578 out: 3579 inode_unlock(inode); 3580 mnt_drop_write_file(filp); 3581 return ret; 3582 } 3583 3584 static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) 3585 { 3586 struct inode *inode = file_inode(filp); 3587 __u32 pin = 0; 3588 3589 if (is_inode_flag_set(inode, FI_PIN_FILE)) 3590 pin = F2FS_I(inode)->i_gc_failures; 3591 return put_user(pin, (u32 __user *)arg); 3592 } 3593 3594 static int f2fs_ioc_get_dev_alias_file(struct file *filp, unsigned long arg) 3595 { 3596 return put_user(IS_DEVICE_ALIASING(file_inode(filp)) ? 1 : 0, 3597 (u32 __user *)arg); 3598 } 3599 3600 static int f2fs_ioc_io_prio(struct file *filp, unsigned long arg) 3601 { 3602 struct inode *inode = file_inode(filp); 3603 __u32 level; 3604 3605 if (get_user(level, (__u32 __user *)arg)) 3606 return -EFAULT; 3607 3608 if (!S_ISREG(inode->i_mode) || level >= F2FS_IOPRIO_MAX) 3609 return -EINVAL; 3610 3611 inode_lock(inode); 3612 F2FS_I(inode)->ioprio_hint = level; 3613 inode_unlock(inode); 3614 return 0; 3615 } 3616 3617 int f2fs_precache_extents(struct inode *inode) 3618 { 3619 struct f2fs_inode_info *fi = F2FS_I(inode); 3620 struct f2fs_map_blocks map; 3621 pgoff_t m_next_extent; 3622 loff_t end; 3623 int err; 3624 3625 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 3626 return -EOPNOTSUPP; 3627 3628 map.m_lblk = 0; 3629 map.m_pblk = 0; 3630 map.m_next_pgofs = NULL; 3631 map.m_next_extent = &m_next_extent; 3632 map.m_seg_type = NO_CHECK_TYPE; 3633 map.m_may_create = false; 3634 end = F2FS_BLK_ALIGN(i_size_read(inode)); 3635 3636 while (map.m_lblk < end) { 3637 map.m_len = end - map.m_lblk; 3638 3639 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 3640 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE); 3641 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 3642 if (err || !map.m_len) 3643 return err; 3644 3645 map.m_lblk = m_next_extent; 3646 } 3647 3648 return 0; 3649 } 3650 3651 static int f2fs_ioc_precache_extents(struct file *filp) 3652 { 3653 return f2fs_precache_extents(file_inode(filp)); 3654 } 3655 3656 static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) 3657 { 3658 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); 3659 __u64 block_count; 3660 3661 if (!capable(CAP_SYS_ADMIN)) 3662 return -EPERM; 3663 3664 if (f2fs_readonly(sbi->sb)) 3665 return -EROFS; 3666 3667 if (copy_from_user(&block_count, (void __user *)arg, 3668 sizeof(block_count))) 3669 return -EFAULT; 3670 3671 return f2fs_resize_fs(filp, block_count); 3672 } 3673 3674 static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg) 3675 { 3676 struct inode *inode = file_inode(filp); 3677 3678 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 3679 3680 if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) { 3681 f2fs_warn(F2FS_I_SB(inode), 3682 "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem", 3683 inode->i_ino); 3684 return -EOPNOTSUPP; 3685 } 3686 3687 return fsverity_ioctl_enable(filp, (const void __user *)arg); 3688 } 3689 3690 static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg) 3691 { 3692 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp)))) 3693 return -EOPNOTSUPP; 3694 3695 return fsverity_ioctl_measure(filp, (void __user *)arg); 3696 } 3697 3698 static int f2fs_ioc_read_verity_metadata(struct file *filp, unsigned long arg) 3699 { 3700 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp)))) 3701 return -EOPNOTSUPP; 3702 3703 return fsverity_ioctl_read_metadata(filp, (const void __user *)arg); 3704 } 3705 3706 static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg) 3707 { 3708 struct inode *inode = file_inode(filp); 3709 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3710 char *vbuf; 3711 int count; 3712 int err = 0; 3713 3714 vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL); 3715 if (!vbuf) 3716 return -ENOMEM; 3717 3718 f2fs_down_read(&sbi->sb_lock); 3719 count = utf16s_to_utf8s(sbi->raw_super->volume_name, 3720 ARRAY_SIZE(sbi->raw_super->volume_name), 3721 UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME); 3722 f2fs_up_read(&sbi->sb_lock); 3723 3724 if (copy_to_user((char __user *)arg, vbuf, 3725 min(FSLABEL_MAX, count))) 3726 err = -EFAULT; 3727 3728 kfree(vbuf); 3729 return err; 3730 } 3731 3732 static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg) 3733 { 3734 struct inode *inode = file_inode(filp); 3735 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3736 char *vbuf; 3737 int err = 0; 3738 3739 if (!capable(CAP_SYS_ADMIN)) 3740 return -EPERM; 3741 3742 vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX); 3743 if (IS_ERR(vbuf)) 3744 return PTR_ERR(vbuf); 3745 3746 err = mnt_want_write_file(filp); 3747 if (err) 3748 goto out; 3749 3750 f2fs_down_write(&sbi->sb_lock); 3751 3752 memset(sbi->raw_super->volume_name, 0, 3753 sizeof(sbi->raw_super->volume_name)); 3754 utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN, 3755 sbi->raw_super->volume_name, 3756 ARRAY_SIZE(sbi->raw_super->volume_name)); 3757 3758 err = f2fs_commit_super(sbi, false); 3759 3760 f2fs_up_write(&sbi->sb_lock); 3761 3762 mnt_drop_write_file(filp); 3763 out: 3764 kfree(vbuf); 3765 return err; 3766 } 3767 3768 static int f2fs_get_compress_blocks(struct inode *inode, __u64 *blocks) 3769 { 3770 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 3771 return -EOPNOTSUPP; 3772 3773 if (!f2fs_compressed_file(inode)) 3774 return -EINVAL; 3775 3776 *blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks); 3777 3778 return 0; 3779 } 3780 3781 static int f2fs_ioc_get_compress_blocks(struct file *filp, unsigned long arg) 3782 { 3783 struct inode *inode = file_inode(filp); 3784 __u64 blocks; 3785 int ret; 3786 3787 ret = f2fs_get_compress_blocks(inode, &blocks); 3788 if (ret < 0) 3789 return ret; 3790 3791 return put_user(blocks, (u64 __user *)arg); 3792 } 3793 3794 static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) 3795 { 3796 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 3797 unsigned int released_blocks = 0; 3798 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 3799 block_t blkaddr; 3800 int i; 3801 3802 for (i = 0; i < count; i++) { 3803 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3804 dn->ofs_in_node + i); 3805 3806 if (!__is_valid_data_blkaddr(blkaddr)) 3807 continue; 3808 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, 3809 DATA_GENERIC_ENHANCE))) 3810 return -EFSCORRUPTED; 3811 } 3812 3813 while (count) { 3814 int compr_blocks = 0; 3815 3816 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { 3817 blkaddr = f2fs_data_blkaddr(dn); 3818 3819 if (i == 0) { 3820 if (blkaddr == COMPRESS_ADDR) 3821 continue; 3822 dn->ofs_in_node += cluster_size; 3823 goto next; 3824 } 3825 3826 if (__is_valid_data_blkaddr(blkaddr)) 3827 compr_blocks++; 3828 3829 if (blkaddr != NEW_ADDR) 3830 continue; 3831 3832 f2fs_set_data_blkaddr(dn, NULL_ADDR); 3833 } 3834 3835 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false); 3836 dec_valid_block_count(sbi, dn->inode, 3837 cluster_size - compr_blocks); 3838 3839 released_blocks += cluster_size - compr_blocks; 3840 next: 3841 count -= cluster_size; 3842 } 3843 3844 return released_blocks; 3845 } 3846 3847 static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) 3848 { 3849 struct inode *inode = file_inode(filp); 3850 struct f2fs_inode_info *fi = F2FS_I(inode); 3851 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3852 struct f2fs_lock_context lc; 3853 pgoff_t page_idx = 0, last_idx; 3854 unsigned int released_blocks = 0; 3855 int ret; 3856 int writecount; 3857 3858 if (!f2fs_sb_has_compression(sbi)) 3859 return -EOPNOTSUPP; 3860 3861 if (f2fs_readonly(sbi->sb)) 3862 return -EROFS; 3863 3864 ret = mnt_want_write_file(filp); 3865 if (ret) 3866 return ret; 3867 3868 f2fs_balance_fs(sbi, true); 3869 3870 inode_lock(inode); 3871 3872 writecount = atomic_read(&inode->i_writecount); 3873 if ((filp->f_mode & FMODE_WRITE && writecount != 1) || 3874 (!(filp->f_mode & FMODE_WRITE) && writecount)) { 3875 ret = -EBUSY; 3876 goto out; 3877 } 3878 3879 if (!f2fs_compressed_file(inode) || 3880 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 3881 ret = -EINVAL; 3882 goto out; 3883 } 3884 3885 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 3886 if (ret) 3887 goto out; 3888 3889 if (!atomic_read(&fi->i_compr_blocks)) { 3890 ret = -EPERM; 3891 goto out; 3892 } 3893 3894 set_inode_flag(inode, FI_COMPRESS_RELEASED); 3895 inode_set_ctime_current(inode); 3896 f2fs_mark_inode_dirty_sync(inode, true); 3897 3898 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 3899 filemap_invalidate_lock(inode->i_mapping); 3900 3901 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 3902 3903 while (page_idx < last_idx) { 3904 struct dnode_of_data dn; 3905 pgoff_t end_offset, count; 3906 3907 f2fs_lock_op(sbi, &lc); 3908 3909 set_new_dnode(&dn, inode, NULL, NULL, 0); 3910 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); 3911 if (ret) { 3912 f2fs_unlock_op(sbi, &lc); 3913 if (ret == -ENOENT) { 3914 page_idx = f2fs_get_next_page_offset(&dn, 3915 page_idx); 3916 ret = 0; 3917 continue; 3918 } 3919 break; 3920 } 3921 3922 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 3923 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); 3924 count = round_up(count, fi->i_cluster_size); 3925 3926 ret = release_compress_blocks(&dn, count); 3927 3928 f2fs_put_dnode(&dn); 3929 3930 f2fs_unlock_op(sbi, &lc); 3931 3932 if (ret < 0) 3933 break; 3934 3935 page_idx += count; 3936 released_blocks += ret; 3937 } 3938 3939 filemap_invalidate_unlock(inode->i_mapping); 3940 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 3941 out: 3942 if (released_blocks) 3943 f2fs_update_time(sbi, REQ_TIME); 3944 inode_unlock(inode); 3945 3946 mnt_drop_write_file(filp); 3947 3948 if (ret >= 0) { 3949 ret = put_user(released_blocks, (u64 __user *)arg); 3950 } else if (released_blocks && 3951 atomic_read(&fi->i_compr_blocks)) { 3952 set_sbi_flag(sbi, SBI_NEED_FSCK); 3953 f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " 3954 "iblocks=%llu, released=%u, compr_blocks=%u, " 3955 "run fsck to fix.", 3956 __func__, inode->i_ino, inode->i_blocks, 3957 released_blocks, 3958 atomic_read(&fi->i_compr_blocks)); 3959 } 3960 3961 return ret; 3962 } 3963 3964 static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, 3965 unsigned int *reserved_blocks) 3966 { 3967 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 3968 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 3969 block_t blkaddr; 3970 int i; 3971 3972 for (i = 0; i < count; i++) { 3973 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3974 dn->ofs_in_node + i); 3975 3976 if (!__is_valid_data_blkaddr(blkaddr)) 3977 continue; 3978 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, 3979 DATA_GENERIC_ENHANCE))) 3980 return -EFSCORRUPTED; 3981 } 3982 3983 while (count) { 3984 int compr_blocks = 0; 3985 blkcnt_t reserved = 0; 3986 blkcnt_t to_reserved; 3987 int ret; 3988 3989 for (i = 0; i < cluster_size; i++) { 3990 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3991 dn->ofs_in_node + i); 3992 3993 if (i == 0) { 3994 if (blkaddr != COMPRESS_ADDR) { 3995 dn->ofs_in_node += cluster_size; 3996 goto next; 3997 } 3998 continue; 3999 } 4000 4001 /* 4002 * compressed cluster was not released due to it 4003 * fails in release_compress_blocks(), so NEW_ADDR 4004 * is a possible case. 4005 */ 4006 if (blkaddr == NEW_ADDR) { 4007 reserved++; 4008 continue; 4009 } 4010 if (__is_valid_data_blkaddr(blkaddr)) { 4011 compr_blocks++; 4012 continue; 4013 } 4014 } 4015 4016 to_reserved = cluster_size - compr_blocks - reserved; 4017 4018 /* for the case all blocks in cluster were reserved */ 4019 if (reserved && to_reserved == 1) { 4020 dn->ofs_in_node += cluster_size; 4021 goto next; 4022 } 4023 4024 ret = inc_valid_block_count(sbi, dn->inode, 4025 &to_reserved, false); 4026 if (unlikely(ret)) 4027 return ret; 4028 4029 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { 4030 if (f2fs_data_blkaddr(dn) == NULL_ADDR) 4031 f2fs_set_data_blkaddr(dn, NEW_ADDR); 4032 } 4033 4034 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); 4035 4036 *reserved_blocks += to_reserved; 4037 next: 4038 count -= cluster_size; 4039 } 4040 4041 return 0; 4042 } 4043 4044 static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) 4045 { 4046 struct inode *inode = file_inode(filp); 4047 struct f2fs_inode_info *fi = F2FS_I(inode); 4048 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4049 pgoff_t page_idx = 0, last_idx; 4050 unsigned int reserved_blocks = 0; 4051 int ret; 4052 4053 if (!f2fs_sb_has_compression(sbi)) 4054 return -EOPNOTSUPP; 4055 4056 if (f2fs_readonly(sbi->sb)) 4057 return -EROFS; 4058 4059 ret = mnt_want_write_file(filp); 4060 if (ret) 4061 return ret; 4062 4063 f2fs_balance_fs(sbi, true); 4064 4065 inode_lock(inode); 4066 4067 if (!f2fs_compressed_file(inode) || 4068 !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4069 ret = -EINVAL; 4070 goto unlock_inode; 4071 } 4072 4073 if (atomic_read(&fi->i_compr_blocks)) 4074 goto unlock_inode; 4075 4076 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 4077 filemap_invalidate_lock(inode->i_mapping); 4078 4079 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4080 4081 while (page_idx < last_idx) { 4082 struct dnode_of_data dn; 4083 struct f2fs_lock_context lc; 4084 pgoff_t end_offset, count; 4085 4086 f2fs_lock_op(sbi, &lc); 4087 4088 set_new_dnode(&dn, inode, NULL, NULL, 0); 4089 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); 4090 if (ret) { 4091 f2fs_unlock_op(sbi, &lc); 4092 if (ret == -ENOENT) { 4093 page_idx = f2fs_get_next_page_offset(&dn, 4094 page_idx); 4095 ret = 0; 4096 continue; 4097 } 4098 break; 4099 } 4100 4101 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 4102 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); 4103 count = round_up(count, fi->i_cluster_size); 4104 4105 ret = reserve_compress_blocks(&dn, count, &reserved_blocks); 4106 4107 f2fs_put_dnode(&dn); 4108 4109 f2fs_unlock_op(sbi, &lc); 4110 4111 if (ret < 0) 4112 break; 4113 4114 page_idx += count; 4115 } 4116 4117 filemap_invalidate_unlock(inode->i_mapping); 4118 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 4119 4120 if (!ret) { 4121 clear_inode_flag(inode, FI_COMPRESS_RELEASED); 4122 inode_set_ctime_current(inode); 4123 f2fs_mark_inode_dirty_sync(inode, true); 4124 } 4125 unlock_inode: 4126 if (reserved_blocks) 4127 f2fs_update_time(sbi, REQ_TIME); 4128 inode_unlock(inode); 4129 mnt_drop_write_file(filp); 4130 4131 if (!ret) { 4132 ret = put_user(reserved_blocks, (u64 __user *)arg); 4133 } else if (reserved_blocks && 4134 atomic_read(&fi->i_compr_blocks)) { 4135 set_sbi_flag(sbi, SBI_NEED_FSCK); 4136 f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx " 4137 "iblocks=%llu, reserved=%u, compr_blocks=%u, " 4138 "run fsck to fix.", 4139 __func__, inode->i_ino, inode->i_blocks, 4140 reserved_blocks, 4141 atomic_read(&fi->i_compr_blocks)); 4142 } 4143 4144 return ret; 4145 } 4146 4147 static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode, 4148 pgoff_t off, block_t block, block_t len, u32 flags) 4149 { 4150 sector_t sector = SECTOR_FROM_BLOCK(block); 4151 sector_t nr_sects = SECTOR_FROM_BLOCK(len); 4152 int ret = 0; 4153 4154 if (flags & F2FS_TRIM_FILE_DISCARD) { 4155 if (bdev_max_secure_erase_sectors(bdev)) 4156 ret = blkdev_issue_secure_erase(bdev, sector, nr_sects, 4157 GFP_NOFS); 4158 else 4159 ret = blkdev_issue_discard(bdev, sector, nr_sects, 4160 GFP_NOFS); 4161 } 4162 4163 if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) { 4164 if (IS_ENCRYPTED(inode)) 4165 ret = fscrypt_zeroout_range(inode, off, block, len); 4166 else 4167 ret = blkdev_issue_zeroout(bdev, sector, nr_sects, 4168 GFP_NOFS, 0); 4169 } 4170 4171 return ret; 4172 } 4173 4174 static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) 4175 { 4176 struct inode *inode = file_inode(filp); 4177 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4178 struct address_space *mapping = inode->i_mapping; 4179 struct block_device *prev_bdev = NULL; 4180 struct f2fs_sectrim_range range; 4181 pgoff_t index, pg_end, prev_index = 0; 4182 block_t prev_block = 0, len = 0; 4183 loff_t end_addr; 4184 bool to_end = false; 4185 int ret = 0; 4186 4187 if (!(filp->f_mode & FMODE_WRITE)) 4188 return -EBADF; 4189 4190 if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg, 4191 sizeof(range))) 4192 return -EFAULT; 4193 4194 if (range.flags == 0 || (range.flags & ~F2FS_TRIM_FILE_MASK) || 4195 !S_ISREG(inode->i_mode)) 4196 return -EINVAL; 4197 4198 if (((range.flags & F2FS_TRIM_FILE_DISCARD) && 4199 !f2fs_hw_support_discard(sbi)) || 4200 ((range.flags & F2FS_TRIM_FILE_ZEROOUT) && 4201 IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi))) 4202 return -EOPNOTSUPP; 4203 4204 ret = mnt_want_write_file(filp); 4205 if (ret) 4206 return ret; 4207 inode_lock(inode); 4208 4209 if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) || 4210 range.start >= inode->i_size) { 4211 ret = -EINVAL; 4212 goto err; 4213 } 4214 4215 if (range.len == 0) 4216 goto err; 4217 4218 if (inode->i_size - range.start > range.len) { 4219 end_addr = range.start + range.len; 4220 } else { 4221 end_addr = range.len == (u64)-1 ? 4222 sbi->sb->s_maxbytes : inode->i_size; 4223 to_end = true; 4224 } 4225 4226 if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) || 4227 (!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) { 4228 ret = -EINVAL; 4229 goto err; 4230 } 4231 4232 index = F2FS_BYTES_TO_BLK(range.start); 4233 pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE); 4234 4235 ret = f2fs_convert_inline_inode(inode); 4236 if (ret) 4237 goto err; 4238 4239 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 4240 filemap_invalidate_lock(mapping); 4241 4242 ret = filemap_write_and_wait_range(mapping, range.start, 4243 to_end ? LLONG_MAX : end_addr - 1); 4244 if (ret) 4245 goto out; 4246 4247 truncate_inode_pages_range(mapping, range.start, 4248 to_end ? -1 : end_addr - 1); 4249 4250 while (index < pg_end) { 4251 struct dnode_of_data dn; 4252 pgoff_t end_offset, count; 4253 int i; 4254 4255 set_new_dnode(&dn, inode, NULL, NULL, 0); 4256 ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); 4257 if (ret) { 4258 if (ret == -ENOENT) { 4259 index = f2fs_get_next_page_offset(&dn, index); 4260 continue; 4261 } 4262 goto out; 4263 } 4264 4265 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 4266 count = min(end_offset - dn.ofs_in_node, pg_end - index); 4267 for (i = 0; i < count; i++, index++, dn.ofs_in_node++) { 4268 struct block_device *cur_bdev; 4269 block_t blkaddr = f2fs_data_blkaddr(&dn); 4270 4271 if (!__is_valid_data_blkaddr(blkaddr)) 4272 continue; 4273 4274 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, 4275 DATA_GENERIC_ENHANCE)) { 4276 ret = -EFSCORRUPTED; 4277 f2fs_put_dnode(&dn); 4278 goto out; 4279 } 4280 4281 cur_bdev = f2fs_target_device(sbi, blkaddr, NULL); 4282 if (f2fs_is_multi_device(sbi)) { 4283 int di = f2fs_target_device_index(sbi, blkaddr); 4284 4285 blkaddr -= FDEV(di).start_blk; 4286 } 4287 4288 if (len) { 4289 if (prev_bdev == cur_bdev && 4290 index == prev_index + len && 4291 blkaddr == prev_block + len) { 4292 len++; 4293 } else { 4294 ret = f2fs_secure_erase(prev_bdev, 4295 inode, prev_index, prev_block, 4296 len, range.flags); 4297 if (ret) { 4298 f2fs_put_dnode(&dn); 4299 goto out; 4300 } 4301 4302 len = 0; 4303 } 4304 } 4305 4306 if (!len) { 4307 prev_bdev = cur_bdev; 4308 prev_index = index; 4309 prev_block = blkaddr; 4310 len = 1; 4311 } 4312 } 4313 4314 f2fs_put_dnode(&dn); 4315 4316 if (fatal_signal_pending(current)) { 4317 ret = -EINTR; 4318 goto out; 4319 } 4320 cond_resched(); 4321 } 4322 4323 if (len) 4324 ret = f2fs_secure_erase(prev_bdev, inode, prev_index, 4325 prev_block, len, range.flags); 4326 f2fs_update_time(sbi, REQ_TIME); 4327 out: 4328 filemap_invalidate_unlock(mapping); 4329 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 4330 err: 4331 inode_unlock(inode); 4332 mnt_drop_write_file(filp); 4333 4334 return ret; 4335 } 4336 4337 static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg) 4338 { 4339 struct inode *inode = file_inode(filp); 4340 struct f2fs_comp_option option; 4341 4342 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 4343 return -EOPNOTSUPP; 4344 4345 inode_lock_shared(inode); 4346 4347 if (!f2fs_compressed_file(inode)) { 4348 inode_unlock_shared(inode); 4349 return -ENODATA; 4350 } 4351 4352 option.algorithm = F2FS_I(inode)->i_compress_algorithm; 4353 option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size; 4354 4355 inode_unlock_shared(inode); 4356 4357 if (copy_to_user((struct f2fs_comp_option __user *)arg, &option, 4358 sizeof(option))) 4359 return -EFAULT; 4360 4361 return 0; 4362 } 4363 4364 static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) 4365 { 4366 struct inode *inode = file_inode(filp); 4367 struct f2fs_inode_info *fi = F2FS_I(inode); 4368 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4369 struct f2fs_comp_option option; 4370 int ret = 0; 4371 4372 if (!f2fs_sb_has_compression(sbi)) 4373 return -EOPNOTSUPP; 4374 4375 if (!(filp->f_mode & FMODE_WRITE)) 4376 return -EBADF; 4377 4378 if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg, 4379 sizeof(option))) 4380 return -EFAULT; 4381 4382 if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || 4383 option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || 4384 option.algorithm >= COMPRESS_MAX) 4385 return -EINVAL; 4386 4387 ret = mnt_want_write_file(filp); 4388 if (ret) 4389 return ret; 4390 inode_lock(inode); 4391 4392 f2fs_down_write(&F2FS_I(inode)->i_sem); 4393 if (!f2fs_compressed_file(inode)) { 4394 ret = -EINVAL; 4395 goto out; 4396 } 4397 4398 if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) { 4399 ret = -EBUSY; 4400 goto out; 4401 } 4402 4403 if (F2FS_HAS_BLOCKS(inode)) { 4404 ret = -EFBIG; 4405 goto out; 4406 } 4407 4408 fi->i_compress_algorithm = option.algorithm; 4409 fi->i_log_cluster_size = option.log_cluster_size; 4410 fi->i_cluster_size = BIT(option.log_cluster_size); 4411 /* Set default level */ 4412 if (fi->i_compress_algorithm == COMPRESS_ZSTD) 4413 fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; 4414 else 4415 fi->i_compress_level = 0; 4416 /* Adjust mount option level */ 4417 if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm && 4418 F2FS_OPTION(sbi).compress_level) 4419 fi->i_compress_level = F2FS_OPTION(sbi).compress_level; 4420 f2fs_mark_inode_dirty_sync(inode, true); 4421 4422 if (!f2fs_is_compress_backend_ready(inode)) 4423 f2fs_warn(sbi, "compression algorithm is successfully set, " 4424 "but current kernel doesn't support this algorithm."); 4425 out: 4426 f2fs_up_write(&fi->i_sem); 4427 inode_unlock(inode); 4428 mnt_drop_write_file(filp); 4429 4430 return ret; 4431 } 4432 4433 static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) 4434 { 4435 DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx); 4436 struct address_space *mapping = inode->i_mapping; 4437 struct folio *folio; 4438 pgoff_t redirty_idx = page_idx; 4439 int page_len = 0, ret = 0; 4440 4441 filemap_invalidate_lock_shared(mapping); 4442 page_cache_ra_unbounded(&ractl, len, 0); 4443 filemap_invalidate_unlock_shared(mapping); 4444 4445 do { 4446 folio = read_cache_folio(mapping, page_idx, NULL, NULL); 4447 if (IS_ERR(folio)) { 4448 ret = PTR_ERR(folio); 4449 break; 4450 } 4451 page_len += folio_nr_pages(folio) - (page_idx - folio->index); 4452 page_idx = folio_next_index(folio); 4453 } while (page_len < len); 4454 4455 do { 4456 folio = filemap_lock_folio(mapping, redirty_idx); 4457 4458 /* It will never fail, when folio has pinned above */ 4459 f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(folio)); 4460 4461 f2fs_folio_wait_writeback(folio, DATA, true, true); 4462 4463 folio_mark_dirty(folio); 4464 folio_set_f2fs_gcing(folio); 4465 redirty_idx = folio_next_index(folio); 4466 folio_unlock(folio); 4467 folio_put_refs(folio, 2); 4468 } while (redirty_idx < page_idx); 4469 4470 return ret; 4471 } 4472 4473 static int f2fs_ioc_decompress_file(struct file *filp) 4474 { 4475 struct inode *inode = file_inode(filp); 4476 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4477 struct f2fs_inode_info *fi = F2FS_I(inode); 4478 pgoff_t page_idx = 0, last_idx, cluster_idx; 4479 int ret; 4480 4481 if (!f2fs_sb_has_compression(sbi) || 4482 F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) 4483 return -EOPNOTSUPP; 4484 4485 if (!(filp->f_mode & FMODE_WRITE)) 4486 return -EBADF; 4487 4488 f2fs_balance_fs(sbi, true); 4489 4490 ret = mnt_want_write_file(filp); 4491 if (ret) 4492 return ret; 4493 inode_lock(inode); 4494 4495 if (!f2fs_is_compress_backend_ready(inode)) { 4496 ret = -EOPNOTSUPP; 4497 goto out; 4498 } 4499 4500 if (!f2fs_compressed_file(inode) || 4501 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4502 ret = -EINVAL; 4503 goto out; 4504 } 4505 4506 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 4507 if (ret) 4508 goto out; 4509 4510 if (!atomic_read(&fi->i_compr_blocks)) 4511 goto out; 4512 4513 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4514 last_idx >>= fi->i_log_cluster_size; 4515 4516 for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { 4517 page_idx = cluster_idx << fi->i_log_cluster_size; 4518 4519 if (!f2fs_is_compressed_cluster(inode, page_idx)) 4520 continue; 4521 4522 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); 4523 if (ret < 0) 4524 break; 4525 4526 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { 4527 ret = filemap_fdatawrite(inode->i_mapping); 4528 if (ret < 0) 4529 break; 4530 } 4531 4532 cond_resched(); 4533 if (fatal_signal_pending(current)) { 4534 ret = -EINTR; 4535 break; 4536 } 4537 } 4538 4539 if (!ret) 4540 ret = filemap_write_and_wait_range(inode->i_mapping, 0, 4541 LLONG_MAX); 4542 4543 if (ret) 4544 f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.", 4545 __func__, ret); 4546 f2fs_update_time(sbi, REQ_TIME); 4547 out: 4548 inode_unlock(inode); 4549 mnt_drop_write_file(filp); 4550 4551 return ret; 4552 } 4553 4554 static int f2fs_ioc_compress_file(struct file *filp) 4555 { 4556 struct inode *inode = file_inode(filp); 4557 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4558 struct f2fs_inode_info *fi = F2FS_I(inode); 4559 pgoff_t page_idx = 0, last_idx, cluster_idx; 4560 int ret; 4561 4562 if (!f2fs_sb_has_compression(sbi) || 4563 F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) 4564 return -EOPNOTSUPP; 4565 4566 if (!(filp->f_mode & FMODE_WRITE)) 4567 return -EBADF; 4568 4569 f2fs_balance_fs(sbi, true); 4570 4571 ret = mnt_want_write_file(filp); 4572 if (ret) 4573 return ret; 4574 inode_lock(inode); 4575 4576 if (!f2fs_is_compress_backend_ready(inode)) { 4577 ret = -EOPNOTSUPP; 4578 goto out; 4579 } 4580 4581 if (!f2fs_compressed_file(inode) || 4582 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4583 ret = -EINVAL; 4584 goto out; 4585 } 4586 4587 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 4588 if (ret) 4589 goto out; 4590 4591 set_inode_flag(inode, FI_ENABLE_COMPRESS); 4592 4593 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4594 last_idx >>= fi->i_log_cluster_size; 4595 4596 for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { 4597 page_idx = cluster_idx << fi->i_log_cluster_size; 4598 4599 if (f2fs_is_sparse_cluster(inode, page_idx)) 4600 continue; 4601 4602 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); 4603 if (ret < 0) 4604 break; 4605 4606 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { 4607 ret = filemap_fdatawrite(inode->i_mapping); 4608 if (ret < 0) 4609 break; 4610 } 4611 4612 cond_resched(); 4613 if (fatal_signal_pending(current)) { 4614 ret = -EINTR; 4615 break; 4616 } 4617 } 4618 4619 if (!ret) 4620 ret = filemap_write_and_wait_range(inode->i_mapping, 0, 4621 LLONG_MAX); 4622 4623 clear_inode_flag(inode, FI_ENABLE_COMPRESS); 4624 4625 if (ret) 4626 f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.", 4627 __func__, ret); 4628 f2fs_update_time(sbi, REQ_TIME); 4629 out: 4630 inode_unlock(inode); 4631 mnt_drop_write_file(filp); 4632 4633 return ret; 4634 } 4635 4636 static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 4637 { 4638 switch (cmd) { 4639 case FS_IOC_GETVERSION: 4640 return f2fs_ioc_getversion(filp, arg); 4641 case F2FS_IOC_START_ATOMIC_WRITE: 4642 return f2fs_ioc_start_atomic_write(filp, false); 4643 case F2FS_IOC_START_ATOMIC_REPLACE: 4644 return f2fs_ioc_start_atomic_write(filp, true); 4645 case F2FS_IOC_COMMIT_ATOMIC_WRITE: 4646 return f2fs_ioc_commit_atomic_write(filp); 4647 case F2FS_IOC_ABORT_ATOMIC_WRITE: 4648 return f2fs_ioc_abort_atomic_write(filp); 4649 case F2FS_IOC_START_VOLATILE_WRITE: 4650 case F2FS_IOC_RELEASE_VOLATILE_WRITE: 4651 return -EOPNOTSUPP; 4652 case F2FS_IOC_SHUTDOWN: 4653 return f2fs_ioc_shutdown(filp, arg); 4654 case FITRIM: 4655 return f2fs_ioc_fitrim(filp, arg); 4656 case FS_IOC_SET_ENCRYPTION_POLICY: 4657 return f2fs_ioc_set_encryption_policy(filp, arg); 4658 case FS_IOC_GET_ENCRYPTION_POLICY: 4659 return f2fs_ioc_get_encryption_policy(filp, arg); 4660 case FS_IOC_GET_ENCRYPTION_PWSALT: 4661 return f2fs_ioc_get_encryption_pwsalt(filp, arg); 4662 case FS_IOC_GET_ENCRYPTION_POLICY_EX: 4663 return f2fs_ioc_get_encryption_policy_ex(filp, arg); 4664 case FS_IOC_ADD_ENCRYPTION_KEY: 4665 return f2fs_ioc_add_encryption_key(filp, arg); 4666 case FS_IOC_REMOVE_ENCRYPTION_KEY: 4667 return f2fs_ioc_remove_encryption_key(filp, arg); 4668 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: 4669 return f2fs_ioc_remove_encryption_key_all_users(filp, arg); 4670 case FS_IOC_GET_ENCRYPTION_KEY_STATUS: 4671 return f2fs_ioc_get_encryption_key_status(filp, arg); 4672 case FS_IOC_GET_ENCRYPTION_NONCE: 4673 return f2fs_ioc_get_encryption_nonce(filp, arg); 4674 case F2FS_IOC_GARBAGE_COLLECT: 4675 return f2fs_ioc_gc(filp, arg); 4676 case F2FS_IOC_GARBAGE_COLLECT_RANGE: 4677 return f2fs_ioc_gc_range(filp, arg); 4678 case F2FS_IOC_WRITE_CHECKPOINT: 4679 return f2fs_ioc_write_checkpoint(filp); 4680 case F2FS_IOC_DEFRAGMENT: 4681 return f2fs_ioc_defragment(filp, arg); 4682 case F2FS_IOC_MOVE_RANGE: 4683 return f2fs_ioc_move_range(filp, arg); 4684 case F2FS_IOC_FLUSH_DEVICE: 4685 return f2fs_ioc_flush_device(filp, arg); 4686 case F2FS_IOC_GET_FEATURES: 4687 return f2fs_ioc_get_features(filp, arg); 4688 case F2FS_IOC_GET_PIN_FILE: 4689 return f2fs_ioc_get_pin_file(filp, arg); 4690 case F2FS_IOC_SET_PIN_FILE: 4691 return f2fs_ioc_set_pin_file(filp, arg); 4692 case F2FS_IOC_PRECACHE_EXTENTS: 4693 return f2fs_ioc_precache_extents(filp); 4694 case F2FS_IOC_RESIZE_FS: 4695 return f2fs_ioc_resize_fs(filp, arg); 4696 case FS_IOC_ENABLE_VERITY: 4697 return f2fs_ioc_enable_verity(filp, arg); 4698 case FS_IOC_MEASURE_VERITY: 4699 return f2fs_ioc_measure_verity(filp, arg); 4700 case FS_IOC_READ_VERITY_METADATA: 4701 return f2fs_ioc_read_verity_metadata(filp, arg); 4702 case FS_IOC_GETFSLABEL: 4703 return f2fs_ioc_getfslabel(filp, arg); 4704 case FS_IOC_SETFSLABEL: 4705 return f2fs_ioc_setfslabel(filp, arg); 4706 case F2FS_IOC_GET_COMPRESS_BLOCKS: 4707 return f2fs_ioc_get_compress_blocks(filp, arg); 4708 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: 4709 return f2fs_release_compress_blocks(filp, arg); 4710 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: 4711 return f2fs_reserve_compress_blocks(filp, arg); 4712 case F2FS_IOC_SEC_TRIM_FILE: 4713 return f2fs_sec_trim_file(filp, arg); 4714 case F2FS_IOC_GET_COMPRESS_OPTION: 4715 return f2fs_ioc_get_compress_option(filp, arg); 4716 case F2FS_IOC_SET_COMPRESS_OPTION: 4717 return f2fs_ioc_set_compress_option(filp, arg); 4718 case F2FS_IOC_DECOMPRESS_FILE: 4719 return f2fs_ioc_decompress_file(filp); 4720 case F2FS_IOC_COMPRESS_FILE: 4721 return f2fs_ioc_compress_file(filp); 4722 case F2FS_IOC_GET_DEV_ALIAS_FILE: 4723 return f2fs_ioc_get_dev_alias_file(filp, arg); 4724 case F2FS_IOC_IO_PRIO: 4725 return f2fs_ioc_io_prio(filp, arg); 4726 default: 4727 return -ENOTTY; 4728 } 4729 } 4730 4731 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 4732 { 4733 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) 4734 return -EIO; 4735 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp)))) 4736 return -ENOSPC; 4737 4738 return __f2fs_ioctl(filp, cmd, arg); 4739 } 4740 4741 /* 4742 * Return %true if the given read or write request should use direct I/O, or 4743 * %false if it should use buffered I/O. 4744 */ 4745 static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb, 4746 struct iov_iter *iter) 4747 { 4748 unsigned int align; 4749 4750 if (!(iocb->ki_flags & IOCB_DIRECT)) 4751 return false; 4752 4753 if (f2fs_force_buffered_io(inode, iov_iter_rw(iter))) 4754 return false; 4755 4756 /* 4757 * Direct I/O not aligned to the disk's logical_block_size will be 4758 * attempted, but will fail with -EINVAL. 4759 * 4760 * f2fs additionally requires that direct I/O be aligned to the 4761 * filesystem block size, which is often a stricter requirement. 4762 * However, f2fs traditionally falls back to buffered I/O on requests 4763 * that are logical_block_size-aligned but not fs-block aligned. 4764 * 4765 * The below logic implements this behavior. 4766 */ 4767 align = iocb->ki_pos | iov_iter_alignment(iter); 4768 if (!IS_ALIGNED(align, i_blocksize(inode)) && 4769 IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev))) 4770 return false; 4771 4772 return true; 4773 } 4774 4775 static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error, 4776 unsigned int flags) 4777 { 4778 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); 4779 4780 dec_page_count(sbi, F2FS_DIO_READ); 4781 if (error) 4782 return error; 4783 f2fs_update_iostat(sbi, NULL, APP_DIRECT_READ_IO, size); 4784 return 0; 4785 } 4786 4787 static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = { 4788 .end_io = f2fs_dio_read_end_io, 4789 }; 4790 4791 static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) 4792 { 4793 struct file *file = iocb->ki_filp; 4794 struct inode *inode = file_inode(file); 4795 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4796 struct f2fs_inode_info *fi = F2FS_I(inode); 4797 const loff_t pos = iocb->ki_pos; 4798 const size_t count = iov_iter_count(to); 4799 struct iomap_dio *dio; 4800 ssize_t ret; 4801 4802 if (count == 0) 4803 return 0; /* skip atime update */ 4804 4805 trace_f2fs_direct_IO_enter(inode, iocb, count, READ); 4806 4807 if (iocb->ki_flags & IOCB_NOWAIT) { 4808 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { 4809 ret = -EAGAIN; 4810 goto out; 4811 } 4812 } else { 4813 f2fs_down_read(&fi->i_gc_rwsem[READ]); 4814 } 4815 4816 /* dio is not compatible w/ atomic file */ 4817 if (f2fs_is_atomic_file(inode)) { 4818 f2fs_up_read(&fi->i_gc_rwsem[READ]); 4819 ret = -EOPNOTSUPP; 4820 goto out; 4821 } 4822 4823 /* 4824 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of 4825 * the higher-level function iomap_dio_rw() in order to ensure that the 4826 * F2FS_DIO_READ counter will be decremented correctly in all cases. 4827 */ 4828 inc_page_count(sbi, F2FS_DIO_READ); 4829 dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops, 4830 &f2fs_iomap_dio_read_ops, 0, NULL, 0); 4831 if (IS_ERR_OR_NULL(dio)) { 4832 ret = PTR_ERR_OR_ZERO(dio); 4833 if (ret != -EIOCBQUEUED) 4834 dec_page_count(sbi, F2FS_DIO_READ); 4835 } else { 4836 ret = iomap_dio_complete(dio); 4837 } 4838 4839 f2fs_up_read(&fi->i_gc_rwsem[READ]); 4840 4841 file_accessed(file); 4842 out: 4843 trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret); 4844 return ret; 4845 } 4846 4847 static void f2fs_trace_rw_file_path(struct file *file, loff_t pos, size_t count, 4848 int rw) 4849 { 4850 struct inode *inode = file_inode(file); 4851 char *buf, *path; 4852 4853 buf = f2fs_getname(F2FS_I_SB(inode)); 4854 if (!buf) 4855 return; 4856 path = dentry_path_raw(file_dentry(file), buf, PATH_MAX); 4857 if (IS_ERR(path)) 4858 goto free_buf; 4859 if (rw == WRITE) 4860 trace_f2fs_datawrite_start(inode, pos, count, 4861 current->pid, path, current->comm); 4862 else 4863 trace_f2fs_dataread_start(inode, pos, count, 4864 current->pid, path, current->comm); 4865 free_buf: 4866 f2fs_putname(buf); 4867 } 4868 4869 static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 4870 { 4871 struct inode *inode = file_inode(iocb->ki_filp); 4872 const loff_t pos = iocb->ki_pos; 4873 ssize_t ret; 4874 bool dio; 4875 4876 if (!f2fs_is_compress_backend_ready(inode)) 4877 return -EOPNOTSUPP; 4878 4879 if (trace_f2fs_dataread_start_enabled()) 4880 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, 4881 iov_iter_count(to), READ); 4882 4883 dio = f2fs_should_use_dio(inode, iocb, to); 4884 4885 /* In LFS mode, if there is inflight dio, wait for its completion */ 4886 if (f2fs_lfs_mode(F2FS_I_SB(inode)) && 4887 get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) && 4888 (!f2fs_is_pinned_file(inode) || !dio)) 4889 inode_dio_wait(inode); 4890 4891 if (dio) { 4892 ret = f2fs_dio_read_iter(iocb, to); 4893 } else { 4894 ret = filemap_read(iocb, to, 0); 4895 if (ret > 0) 4896 f2fs_update_iostat(F2FS_I_SB(inode), inode, 4897 APP_BUFFERED_READ_IO, ret); 4898 } 4899 trace_f2fs_dataread_end(inode, pos, ret); 4900 return ret; 4901 } 4902 4903 static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos, 4904 struct pipe_inode_info *pipe, 4905 size_t len, unsigned int flags) 4906 { 4907 struct inode *inode = file_inode(in); 4908 const loff_t pos = *ppos; 4909 ssize_t ret; 4910 4911 if (!f2fs_is_compress_backend_ready(inode)) 4912 return -EOPNOTSUPP; 4913 4914 if (trace_f2fs_dataread_start_enabled()) 4915 f2fs_trace_rw_file_path(in, pos, len, READ); 4916 4917 ret = filemap_splice_read(in, ppos, pipe, len, flags); 4918 if (ret > 0) 4919 f2fs_update_iostat(F2FS_I_SB(inode), inode, 4920 APP_BUFFERED_READ_IO, ret); 4921 4922 trace_f2fs_dataread_end(inode, pos, ret); 4923 return ret; 4924 } 4925 4926 static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from) 4927 { 4928 struct file *file = iocb->ki_filp; 4929 struct inode *inode = file_inode(file); 4930 ssize_t count; 4931 int err; 4932 4933 if (IS_IMMUTABLE(inode)) 4934 return -EPERM; 4935 4936 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) 4937 return -EPERM; 4938 4939 count = generic_write_checks(iocb, from); 4940 if (count <= 0) 4941 return count; 4942 4943 err = file_modified(file); 4944 if (err) 4945 return err; 4946 4947 f2fs_zero_post_eof_page(inode, 4948 iocb->ki_pos + iov_iter_count(from), true); 4949 return count; 4950 } 4951 4952 /* 4953 * Preallocate blocks for a write request, if it is possible and helpful to do 4954 * so. Returns a positive number if blocks may have been preallocated, 0 if no 4955 * blocks were preallocated, or a negative errno value if something went 4956 * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the 4957 * requested blocks (not just some of them) have been allocated. 4958 */ 4959 static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, 4960 bool dio) 4961 { 4962 struct inode *inode = file_inode(iocb->ki_filp); 4963 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4964 const loff_t pos = iocb->ki_pos; 4965 const size_t count = iov_iter_count(iter); 4966 struct f2fs_map_blocks map = {}; 4967 int flag; 4968 int ret; 4969 4970 /* If it will be an out-of-place direct write, don't bother. */ 4971 if (dio && f2fs_lfs_mode(sbi)) 4972 return 0; 4973 /* 4974 * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into 4975 * buffered IO, if DIO meets any holes. 4976 */ 4977 if (dio && i_size_read(inode) && 4978 (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode)))) 4979 return 0; 4980 4981 /* No-wait I/O can't allocate blocks. */ 4982 if (iocb->ki_flags & IOCB_NOWAIT) 4983 return 0; 4984 4985 /* If it will be a short write, don't bother. */ 4986 if (fault_in_iov_iter_readable(iter, count)) 4987 return 0; 4988 4989 if (f2fs_has_inline_data(inode)) { 4990 /* If the data will fit inline, don't bother. */ 4991 if (pos + count <= MAX_INLINE_DATA(inode)) 4992 return 0; 4993 ret = f2fs_convert_inline_inode(inode); 4994 if (ret) 4995 return ret; 4996 } 4997 4998 /* Do not preallocate blocks that will be written partially in 4KB. */ 4999 map.m_lblk = F2FS_BLK_ALIGN(pos); 5000 map.m_len = F2FS_BYTES_TO_BLK(pos + count); 5001 if (map.m_len > map.m_lblk) 5002 map.m_len -= map.m_lblk; 5003 else 5004 return 0; 5005 5006 if (!IS_DEVICE_ALIASING(inode)) 5007 map.m_may_create = true; 5008 if (dio) { 5009 map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, 5010 inode->i_write_hint); 5011 flag = F2FS_GET_BLOCK_PRE_DIO; 5012 } else { 5013 map.m_seg_type = NO_CHECK_TYPE; 5014 flag = F2FS_GET_BLOCK_PRE_AIO; 5015 } 5016 5017 ret = f2fs_map_blocks(inode, &map, flag); 5018 /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */ 5019 if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0)) 5020 return ret; 5021 if (ret == 0) 5022 set_inode_flag(inode, FI_PREALLOCATED_ALL); 5023 return map.m_len; 5024 } 5025 5026 static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb, 5027 struct iov_iter *from) 5028 { 5029 struct file *file = iocb->ki_filp; 5030 struct inode *inode = file_inode(file); 5031 ssize_t ret; 5032 5033 if (iocb->ki_flags & IOCB_NOWAIT) 5034 return -EOPNOTSUPP; 5035 5036 ret = generic_perform_write(iocb, from); 5037 5038 if (ret > 0) { 5039 f2fs_update_iostat(F2FS_I_SB(inode), inode, 5040 APP_BUFFERED_IO, ret); 5041 } 5042 return ret; 5043 } 5044 5045 static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error, 5046 unsigned int flags) 5047 { 5048 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); 5049 5050 dec_page_count(sbi, F2FS_DIO_WRITE); 5051 if (error) 5052 return error; 5053 f2fs_update_time(sbi, REQ_TIME); 5054 f2fs_update_iostat(sbi, NULL, APP_DIRECT_IO, size); 5055 return 0; 5056 } 5057 5058 static void f2fs_dio_write_submit_io(const struct iomap_iter *iter, 5059 struct bio *bio, loff_t file_offset) 5060 { 5061 struct inode *inode = iter->inode; 5062 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 5063 enum log_type type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint); 5064 enum temp_type temp = f2fs_get_segment_temp(sbi, type); 5065 5066 bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp); 5067 blk_crypto_submit_bio(bio); 5068 } 5069 5070 static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = { 5071 .end_io = f2fs_dio_write_end_io, 5072 .submit_io = f2fs_dio_write_submit_io, 5073 }; 5074 5075 static void f2fs_flush_buffered_write(struct address_space *mapping, 5076 loff_t start_pos, loff_t end_pos) 5077 { 5078 int ret; 5079 5080 ret = filemap_write_and_wait_range(mapping, start_pos, end_pos); 5081 if (ret < 0) 5082 return; 5083 invalidate_mapping_pages(mapping, 5084 start_pos >> PAGE_SHIFT, 5085 end_pos >> PAGE_SHIFT); 5086 } 5087 5088 static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, 5089 bool *may_need_sync) 5090 { 5091 struct file *file = iocb->ki_filp; 5092 struct inode *inode = file_inode(file); 5093 struct f2fs_inode_info *fi = F2FS_I(inode); 5094 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 5095 const bool do_opu = f2fs_lfs_mode(sbi); 5096 const loff_t pos = iocb->ki_pos; 5097 const ssize_t count = iov_iter_count(from); 5098 unsigned int dio_flags; 5099 struct iomap_dio *dio; 5100 ssize_t ret; 5101 5102 trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE); 5103 5104 if (iocb->ki_flags & IOCB_NOWAIT) { 5105 /* f2fs_convert_inline_inode() and block allocation can block */ 5106 if (f2fs_has_inline_data(inode) || 5107 !f2fs_overwrite_io(inode, pos, count)) { 5108 ret = -EAGAIN; 5109 goto out; 5110 } 5111 5112 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) { 5113 ret = -EAGAIN; 5114 goto out; 5115 } 5116 if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { 5117 f2fs_up_read(&fi->i_gc_rwsem[WRITE]); 5118 ret = -EAGAIN; 5119 goto out; 5120 } 5121 } else { 5122 ret = f2fs_convert_inline_inode(inode); 5123 if (ret) 5124 goto out; 5125 5126 f2fs_down_read(&fi->i_gc_rwsem[WRITE]); 5127 if (do_opu) 5128 f2fs_down_read(&fi->i_gc_rwsem[READ]); 5129 } 5130 5131 /* 5132 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of 5133 * the higher-level function iomap_dio_rw() in order to ensure that the 5134 * F2FS_DIO_WRITE counter will be decremented correctly in all cases. 5135 */ 5136 inc_page_count(sbi, F2FS_DIO_WRITE); 5137 dio_flags = 0; 5138 if (pos + count > inode->i_size) 5139 dio_flags |= IOMAP_DIO_FORCE_WAIT; 5140 dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops, 5141 &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0); 5142 if (IS_ERR_OR_NULL(dio)) { 5143 ret = PTR_ERR_OR_ZERO(dio); 5144 if (ret == -ENOTBLK) 5145 ret = 0; 5146 if (ret != -EIOCBQUEUED) 5147 dec_page_count(sbi, F2FS_DIO_WRITE); 5148 } else { 5149 ret = iomap_dio_complete(dio); 5150 } 5151 5152 if (do_opu) 5153 f2fs_up_read(&fi->i_gc_rwsem[READ]); 5154 f2fs_up_read(&fi->i_gc_rwsem[WRITE]); 5155 5156 if (ret < 0) 5157 goto out; 5158 if (pos + ret > inode->i_size) 5159 f2fs_i_size_write(inode, pos + ret); 5160 if (!do_opu) 5161 set_inode_flag(inode, FI_UPDATE_WRITE); 5162 5163 if (iov_iter_count(from)) { 5164 ssize_t ret2; 5165 loff_t bufio_start_pos = iocb->ki_pos; 5166 5167 /* 5168 * The direct write was partial, so we need to fall back to a 5169 * buffered write for the remainder. 5170 */ 5171 5172 ret2 = f2fs_buffered_write_iter(iocb, from); 5173 if (iov_iter_count(from)) 5174 f2fs_write_failed(inode, iocb->ki_pos); 5175 if (ret2 < 0) 5176 goto out; 5177 5178 /* 5179 * Ensure that the pagecache pages are written to disk and 5180 * invalidated to preserve the expected O_DIRECT semantics. 5181 */ 5182 if (ret2 > 0) { 5183 loff_t bufio_end_pos = bufio_start_pos + ret2 - 1; 5184 5185 ret += ret2; 5186 5187 f2fs_flush_buffered_write(file->f_mapping, 5188 bufio_start_pos, 5189 bufio_end_pos); 5190 } 5191 } else { 5192 /* iomap_dio_rw() already handled the generic_write_sync(). */ 5193 *may_need_sync = false; 5194 } 5195 out: 5196 trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret); 5197 return ret; 5198 } 5199 5200 static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 5201 { 5202 struct inode *inode = file_inode(iocb->ki_filp); 5203 const loff_t orig_pos = iocb->ki_pos; 5204 const size_t orig_count = iov_iter_count(from); 5205 loff_t target_size; 5206 bool dio; 5207 bool may_need_sync = true; 5208 int preallocated; 5209 const loff_t pos = iocb->ki_pos; 5210 const ssize_t count = iov_iter_count(from); 5211 ssize_t ret; 5212 5213 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { 5214 ret = -EIO; 5215 goto out; 5216 } 5217 5218 if (!f2fs_is_compress_backend_ready(inode)) { 5219 ret = -EOPNOTSUPP; 5220 goto out; 5221 } 5222 5223 if (iocb->ki_flags & IOCB_NOWAIT) { 5224 if (!inode_trylock(inode)) { 5225 ret = -EAGAIN; 5226 goto out; 5227 } 5228 } else { 5229 inode_lock(inode); 5230 } 5231 5232 if (f2fs_is_pinned_file(inode) && 5233 !f2fs_overwrite_io(inode, pos, count)) { 5234 ret = -EIO; 5235 goto out_unlock; 5236 } 5237 5238 ret = f2fs_write_checks(iocb, from); 5239 if (ret <= 0) 5240 goto out_unlock; 5241 5242 /* Determine whether we will do a direct write or a buffered write. */ 5243 dio = f2fs_should_use_dio(inode, iocb, from); 5244 5245 /* dio is not compatible w/ atomic write */ 5246 if (dio && f2fs_is_atomic_file(inode)) { 5247 ret = -EOPNOTSUPP; 5248 goto out_unlock; 5249 } 5250 5251 /* Possibly preallocate the blocks for the write. */ 5252 target_size = iocb->ki_pos + iov_iter_count(from); 5253 preallocated = f2fs_preallocate_blocks(iocb, from, dio); 5254 if (preallocated < 0) { 5255 ret = preallocated; 5256 } else { 5257 if (trace_f2fs_datawrite_start_enabled()) 5258 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, 5259 orig_count, WRITE); 5260 5261 /* Do the actual write. */ 5262 ret = dio ? 5263 f2fs_dio_write_iter(iocb, from, &may_need_sync) : 5264 f2fs_buffered_write_iter(iocb, from); 5265 5266 trace_f2fs_datawrite_end(inode, orig_pos, ret); 5267 } 5268 5269 /* Don't leave any preallocated blocks around past i_size. */ 5270 if (preallocated && i_size_read(inode) < target_size) { 5271 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 5272 filemap_invalidate_lock(inode->i_mapping); 5273 if (!f2fs_truncate(inode)) 5274 file_dont_truncate(inode); 5275 filemap_invalidate_unlock(inode->i_mapping); 5276 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 5277 } else { 5278 file_dont_truncate(inode); 5279 } 5280 5281 clear_inode_flag(inode, FI_PREALLOCATED_ALL); 5282 out_unlock: 5283 inode_unlock(inode); 5284 out: 5285 trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret); 5286 5287 if (ret > 0 && may_need_sync) 5288 ret = generic_write_sync(iocb, ret); 5289 5290 /* If buffered IO was forced, flush and drop the data from 5291 * the page cache to preserve O_DIRECT semantics 5292 */ 5293 if (ret > 0 && !dio && (iocb->ki_flags & IOCB_DIRECT)) 5294 f2fs_flush_buffered_write(iocb->ki_filp->f_mapping, 5295 orig_pos, 5296 orig_pos + ret - 1); 5297 5298 return ret; 5299 } 5300 5301 static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len, 5302 int advice) 5303 { 5304 struct address_space *mapping; 5305 struct backing_dev_info *bdi; 5306 struct inode *inode = file_inode(filp); 5307 int err; 5308 5309 trace_f2fs_fadvise(inode, offset, len, advice); 5310 5311 if (advice == POSIX_FADV_SEQUENTIAL) { 5312 if (S_ISFIFO(inode->i_mode)) 5313 return -ESPIPE; 5314 5315 mapping = filp->f_mapping; 5316 if (!mapping || len < 0) 5317 return -EINVAL; 5318 5319 bdi = inode_to_bdi(mapping->host); 5320 filp->f_ra.ra_pages = bdi->ra_pages * 5321 F2FS_I_SB(inode)->seq_file_ra_mul; 5322 spin_lock(&filp->f_lock); 5323 filp->f_mode &= ~FMODE_RANDOM; 5324 spin_unlock(&filp->f_lock); 5325 return 0; 5326 } else if (advice == POSIX_FADV_WILLNEED && offset == 0) { 5327 /* Load extent cache at the first readahead. */ 5328 f2fs_precache_extents(inode); 5329 } 5330 5331 err = generic_fadvise(filp, offset, len, advice); 5332 if (err) 5333 return err; 5334 5335 if (advice == POSIX_FADV_DONTNEED && 5336 (test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) && 5337 f2fs_compressed_file(inode))) 5338 f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino); 5339 else if (advice == POSIX_FADV_NOREUSE) 5340 err = f2fs_keep_noreuse_range(inode, offset, len); 5341 return err; 5342 } 5343 5344 #ifdef CONFIG_COMPAT 5345 struct compat_f2fs_gc_range { 5346 u32 sync; 5347 compat_u64 start; 5348 compat_u64 len; 5349 }; 5350 #define F2FS_IOC32_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11,\ 5351 struct compat_f2fs_gc_range) 5352 5353 static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg) 5354 { 5355 struct compat_f2fs_gc_range __user *urange; 5356 struct f2fs_gc_range range; 5357 int err; 5358 5359 urange = compat_ptr(arg); 5360 err = get_user(range.sync, &urange->sync); 5361 err |= get_user(range.start, &urange->start); 5362 err |= get_user(range.len, &urange->len); 5363 if (err) 5364 return -EFAULT; 5365 5366 return __f2fs_ioc_gc_range(file, &range); 5367 } 5368 5369 struct compat_f2fs_move_range { 5370 u32 dst_fd; 5371 compat_u64 pos_in; 5372 compat_u64 pos_out; 5373 compat_u64 len; 5374 }; 5375 #define F2FS_IOC32_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ 5376 struct compat_f2fs_move_range) 5377 5378 static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg) 5379 { 5380 struct compat_f2fs_move_range __user *urange; 5381 struct f2fs_move_range range; 5382 int err; 5383 5384 urange = compat_ptr(arg); 5385 err = get_user(range.dst_fd, &urange->dst_fd); 5386 err |= get_user(range.pos_in, &urange->pos_in); 5387 err |= get_user(range.pos_out, &urange->pos_out); 5388 err |= get_user(range.len, &urange->len); 5389 if (err) 5390 return -EFAULT; 5391 5392 return __f2fs_ioc_move_range(file, &range); 5393 } 5394 5395 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 5396 { 5397 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) 5398 return -EIO; 5399 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file)))) 5400 return -ENOSPC; 5401 5402 switch (cmd) { 5403 case FS_IOC32_GETVERSION: 5404 cmd = FS_IOC_GETVERSION; 5405 break; 5406 case F2FS_IOC32_GARBAGE_COLLECT_RANGE: 5407 return f2fs_compat_ioc_gc_range(file, arg); 5408 case F2FS_IOC32_MOVE_RANGE: 5409 return f2fs_compat_ioc_move_range(file, arg); 5410 case F2FS_IOC_START_ATOMIC_WRITE: 5411 case F2FS_IOC_START_ATOMIC_REPLACE: 5412 case F2FS_IOC_COMMIT_ATOMIC_WRITE: 5413 case F2FS_IOC_START_VOLATILE_WRITE: 5414 case F2FS_IOC_RELEASE_VOLATILE_WRITE: 5415 case F2FS_IOC_ABORT_ATOMIC_WRITE: 5416 case F2FS_IOC_SHUTDOWN: 5417 case FITRIM: 5418 case FS_IOC_SET_ENCRYPTION_POLICY: 5419 case FS_IOC_GET_ENCRYPTION_PWSALT: 5420 case FS_IOC_GET_ENCRYPTION_POLICY: 5421 case FS_IOC_GET_ENCRYPTION_POLICY_EX: 5422 case FS_IOC_ADD_ENCRYPTION_KEY: 5423 case FS_IOC_REMOVE_ENCRYPTION_KEY: 5424 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: 5425 case FS_IOC_GET_ENCRYPTION_KEY_STATUS: 5426 case FS_IOC_GET_ENCRYPTION_NONCE: 5427 case F2FS_IOC_GARBAGE_COLLECT: 5428 case F2FS_IOC_WRITE_CHECKPOINT: 5429 case F2FS_IOC_DEFRAGMENT: 5430 case F2FS_IOC_FLUSH_DEVICE: 5431 case F2FS_IOC_GET_FEATURES: 5432 case F2FS_IOC_GET_PIN_FILE: 5433 case F2FS_IOC_SET_PIN_FILE: 5434 case F2FS_IOC_PRECACHE_EXTENTS: 5435 case F2FS_IOC_RESIZE_FS: 5436 case FS_IOC_ENABLE_VERITY: 5437 case FS_IOC_MEASURE_VERITY: 5438 case FS_IOC_READ_VERITY_METADATA: 5439 case FS_IOC_GETFSLABEL: 5440 case FS_IOC_SETFSLABEL: 5441 case F2FS_IOC_GET_COMPRESS_BLOCKS: 5442 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: 5443 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: 5444 case F2FS_IOC_SEC_TRIM_FILE: 5445 case F2FS_IOC_GET_COMPRESS_OPTION: 5446 case F2FS_IOC_SET_COMPRESS_OPTION: 5447 case F2FS_IOC_DECOMPRESS_FILE: 5448 case F2FS_IOC_COMPRESS_FILE: 5449 case F2FS_IOC_GET_DEV_ALIAS_FILE: 5450 case F2FS_IOC_IO_PRIO: 5451 break; 5452 default: 5453 return -ENOIOCTLCMD; 5454 } 5455 return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); 5456 } 5457 #endif 5458 5459 const struct file_operations f2fs_file_operations = { 5460 .llseek = f2fs_llseek, 5461 .read_iter = f2fs_file_read_iter, 5462 .write_iter = f2fs_file_write_iter, 5463 .iopoll = iocb_bio_iopoll, 5464 .open = f2fs_file_open, 5465 .release = f2fs_release_file, 5466 .mmap_prepare = f2fs_file_mmap_prepare, 5467 .flush = f2fs_file_flush, 5468 .fsync = f2fs_sync_file, 5469 .fallocate = f2fs_fallocate, 5470 .unlocked_ioctl = f2fs_ioctl, 5471 #ifdef CONFIG_COMPAT 5472 .compat_ioctl = f2fs_compat_ioctl, 5473 #endif 5474 .splice_read = f2fs_file_splice_read, 5475 .splice_write = iter_file_splice_write, 5476 .fadvise = f2fs_file_fadvise, 5477 .fop_flags = FOP_BUFFER_RASYNC, 5478 .setlease = generic_setlease, 5479 }; 5480