1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/f2fs/file.c 4 * 5 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 6 * http://www.samsung.com/ 7 */ 8 #include <linux/blk-crypto.h> 9 #include <linux/fs.h> 10 #include <linux/f2fs_fs.h> 11 #include <linux/stat.h> 12 #include <linux/writeback.h> 13 #include <linux/blkdev.h> 14 #include <linux/falloc.h> 15 #include <linux/filelock.h> 16 #include <linux/types.h> 17 #include <linux/compat.h> 18 #include <linux/uaccess.h> 19 #include <linux/mount.h> 20 #include <linux/pagevec.h> 21 #include <linux/uio.h> 22 #include <linux/uuid.h> 23 #include <linux/file.h> 24 #include <linux/nls.h> 25 #include <linux/sched/signal.h> 26 #include <linux/fileattr.h> 27 #include <linux/fadvise.h> 28 #include <linux/iomap.h> 29 30 #include "f2fs.h" 31 #include "node.h" 32 #include "segment.h" 33 #include "xattr.h" 34 #include "acl.h" 35 #include "gc.h" 36 #include "iostat.h" 37 #include <trace/events/f2fs.h> 38 #include <uapi/linux/f2fs.h> 39 40 static void f2fs_zero_post_eof_page(struct inode *inode, 41 loff_t new_size, bool lock) 42 { 43 loff_t old_size = i_size_read(inode); 44 45 if (old_size >= new_size) 46 return; 47 48 if (mapping_empty(inode->i_mapping)) 49 return; 50 51 if (lock) 52 filemap_invalidate_lock(inode->i_mapping); 53 /* zero or drop pages only in range of [old_size, new_size] */ 54 truncate_inode_pages_range(inode->i_mapping, old_size, new_size); 55 if (lock) 56 filemap_invalidate_unlock(inode->i_mapping); 57 } 58 59 static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) 60 { 61 struct inode *inode = file_inode(vmf->vma->vm_file); 62 vm_flags_t flags = vmf->vma->vm_flags; 63 vm_fault_t ret; 64 65 ret = filemap_fault(vmf); 66 if (ret & VM_FAULT_LOCKED) 67 f2fs_update_iostat(F2FS_I_SB(inode), inode, 68 APP_MAPPED_READ_IO, F2FS_BLKSIZE); 69 70 trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret); 71 72 return ret; 73 } 74 75 static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) 76 { 77 struct folio *folio = page_folio(vmf->page); 78 struct inode *inode = file_inode(vmf->vma->vm_file); 79 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 80 struct dnode_of_data dn; 81 bool need_alloc = !f2fs_is_pinned_file(inode); 82 int err = 0; 83 vm_fault_t ret; 84 85 if (unlikely(IS_IMMUTABLE(inode))) 86 return VM_FAULT_SIGBUS; 87 88 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 89 err = -EIO; 90 goto out; 91 } 92 93 if (unlikely(f2fs_cp_error(sbi))) { 94 err = -EIO; 95 goto out; 96 } 97 98 if (!f2fs_is_checkpoint_ready(sbi)) { 99 err = -ENOSPC; 100 goto out; 101 } 102 103 err = f2fs_convert_inline_inode(inode); 104 if (err) 105 goto out; 106 107 #ifdef CONFIG_F2FS_FS_COMPRESSION 108 if (f2fs_compressed_file(inode)) { 109 int ret = f2fs_is_compressed_cluster(inode, folio->index); 110 111 if (ret < 0) { 112 err = ret; 113 goto out; 114 } else if (ret) { 115 need_alloc = false; 116 } 117 } 118 #endif 119 /* should do out of any locked page */ 120 if (need_alloc) 121 f2fs_balance_fs(sbi, true); 122 123 sb_start_pagefault(inode->i_sb); 124 125 f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); 126 127 f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT, true); 128 129 file_update_time(vmf->vma->vm_file); 130 filemap_invalidate_lock_shared(inode->i_mapping); 131 132 folio_lock(folio); 133 if (unlikely(folio->mapping != inode->i_mapping || 134 folio_pos(folio) > i_size_read(inode) || 135 !folio_test_uptodate(folio))) { 136 folio_unlock(folio); 137 err = -EFAULT; 138 goto out_sem; 139 } 140 141 set_new_dnode(&dn, inode, NULL, NULL, 0); 142 if (need_alloc) { 143 /* block allocation */ 144 err = f2fs_get_block_locked(&dn, folio->index); 145 } else { 146 err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); 147 f2fs_put_dnode(&dn); 148 if (f2fs_is_pinned_file(inode) && 149 !__is_valid_data_blkaddr(dn.data_blkaddr)) 150 err = -EIO; 151 } 152 153 if (err) { 154 folio_unlock(folio); 155 goto out_sem; 156 } 157 158 f2fs_folio_wait_writeback(folio, DATA, false, true); 159 160 /* wait for GCed page writeback via META_MAPPING */ 161 f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); 162 163 /* 164 * check to see if the page is mapped already (no holes) 165 */ 166 if (folio_test_mappedtodisk(folio)) 167 goto out_sem; 168 169 /* page is wholly or partially inside EOF */ 170 if (((loff_t)(folio->index + 1) << PAGE_SHIFT) > 171 i_size_read(inode)) { 172 loff_t offset; 173 174 offset = i_size_read(inode) & ~PAGE_MASK; 175 folio_zero_segment(folio, offset, folio_size(folio)); 176 } 177 folio_mark_dirty(folio); 178 179 f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE); 180 f2fs_update_time(sbi, REQ_TIME); 181 182 out_sem: 183 filemap_invalidate_unlock_shared(inode->i_mapping); 184 185 sb_end_pagefault(inode->i_sb); 186 out: 187 ret = vmf_fs_error(err); 188 189 trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret); 190 return ret; 191 } 192 193 static const struct vm_operations_struct f2fs_file_vm_ops = { 194 .fault = f2fs_filemap_fault, 195 .map_pages = filemap_map_pages, 196 .page_mkwrite = f2fs_vm_page_mkwrite, 197 }; 198 199 static int get_parent_ino(struct inode *inode, nid_t *pino) 200 { 201 struct dentry *dentry; 202 203 /* 204 * Make sure to get the non-deleted alias. The alias associated with 205 * the open file descriptor being fsync()'ed may be deleted already. 206 */ 207 dentry = d_find_alias(inode); 208 if (!dentry) 209 return 0; 210 211 *pino = d_parent_ino(dentry); 212 dput(dentry); 213 return 1; 214 } 215 216 static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) 217 { 218 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 219 enum cp_reason_type cp_reason = CP_NO_NEEDED; 220 221 if (!S_ISREG(inode->i_mode)) 222 cp_reason = CP_NON_REGULAR; 223 else if (f2fs_compressed_file(inode)) 224 cp_reason = CP_COMPRESSED; 225 else if (inode->i_nlink != 1) 226 cp_reason = CP_HARDLINK; 227 else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) 228 cp_reason = CP_SB_NEED_CP; 229 else if (file_wrong_pino(inode)) 230 cp_reason = CP_WRONG_PINO; 231 else if (!f2fs_space_for_roll_forward(sbi)) 232 cp_reason = CP_NO_SPC_ROLL; 233 else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) 234 cp_reason = CP_NODE_NEED_CP; 235 else if (test_opt(sbi, FASTBOOT)) 236 cp_reason = CP_FASTBOOT_MODE; 237 else if (F2FS_OPTION(sbi).active_logs == 2) 238 cp_reason = CP_SPEC_LOG_NUM; 239 else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT && 240 f2fs_need_dentry_mark(sbi, inode->i_ino) && 241 f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, 242 TRANS_DIR_INO)) 243 cp_reason = CP_RECOVER_DIR; 244 else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, 245 XATTR_DIR_INO)) 246 cp_reason = CP_XATTR_DIR; 247 248 return cp_reason; 249 } 250 251 static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) 252 { 253 struct folio *i = filemap_get_folio(NODE_MAPPING(sbi), ino); 254 bool ret = false; 255 /* But we need to avoid that there are some inode updates */ 256 if ((!IS_ERR(i) && folio_test_dirty(i)) || 257 f2fs_need_inode_block_update(sbi, ino)) 258 ret = true; 259 f2fs_folio_put(i, false); 260 return ret; 261 } 262 263 static void try_to_fix_pino(struct inode *inode) 264 { 265 struct f2fs_inode_info *fi = F2FS_I(inode); 266 nid_t pino; 267 268 f2fs_down_write(&fi->i_sem); 269 if (file_wrong_pino(inode) && inode->i_nlink == 1 && 270 get_parent_ino(inode, &pino)) { 271 f2fs_i_pino_write(inode, pino); 272 file_got_pino(inode); 273 } 274 f2fs_up_write(&fi->i_sem); 275 } 276 277 static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, 278 int datasync, bool atomic) 279 { 280 struct inode *inode = file->f_mapping->host; 281 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 282 nid_t ino = inode->i_ino; 283 int ret = 0; 284 enum cp_reason_type cp_reason = 0; 285 struct writeback_control wbc = { 286 .sync_mode = WB_SYNC_ALL, 287 .nr_to_write = LONG_MAX, 288 }; 289 unsigned int seq_id = 0; 290 291 if (unlikely(f2fs_readonly(inode->i_sb))) 292 return 0; 293 294 trace_f2fs_sync_file_enter(inode); 295 296 if (S_ISDIR(inode->i_mode)) 297 goto go_write; 298 299 /* if fdatasync is triggered, let's do in-place-update */ 300 if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) 301 set_inode_flag(inode, FI_NEED_IPU); 302 ret = file_write_and_wait_range(file, start, end); 303 clear_inode_flag(inode, FI_NEED_IPU); 304 305 if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { 306 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); 307 return ret; 308 } 309 310 /* if the inode is dirty, let's recover all the time */ 311 if (!f2fs_skip_inode_update(inode, datasync)) { 312 f2fs_write_inode(inode, NULL); 313 goto go_write; 314 } 315 316 /* 317 * if there is no written data, don't waste time to write recovery info. 318 */ 319 if (!is_inode_flag_set(inode, FI_APPEND_WRITE) && 320 !f2fs_exist_written_data(sbi, ino, APPEND_INO)) { 321 322 /* it may call write_inode just prior to fsync */ 323 if (need_inode_page_update(sbi, ino)) 324 goto go_write; 325 326 if (is_inode_flag_set(inode, FI_UPDATE_WRITE) || 327 f2fs_exist_written_data(sbi, ino, UPDATE_INO)) 328 goto flush_out; 329 goto out; 330 } else { 331 /* 332 * for OPU case, during fsync(), node can be persisted before 333 * data when lower device doesn't support write barrier, result 334 * in data corruption after SPO. 335 * So for strict fsync mode, force to use atomic write semantics 336 * to keep write order in between data/node and last node to 337 * avoid potential data corruption. 338 */ 339 if (F2FS_OPTION(sbi).fsync_mode == 340 FSYNC_MODE_STRICT && !atomic) 341 atomic = true; 342 } 343 go_write: 344 /* 345 * Both of fdatasync() and fsync() are able to be recovered from 346 * sudden-power-off. 347 */ 348 f2fs_down_read(&F2FS_I(inode)->i_sem); 349 cp_reason = need_do_checkpoint(inode); 350 f2fs_up_read(&F2FS_I(inode)->i_sem); 351 352 if (cp_reason) { 353 /* all the dirty node pages should be flushed for POR */ 354 ret = f2fs_sync_fs(inode->i_sb, 1); 355 356 /* 357 * We've secured consistency through sync_fs. Following pino 358 * will be used only for fsynced inodes after checkpoint. 359 */ 360 try_to_fix_pino(inode); 361 clear_inode_flag(inode, FI_APPEND_WRITE); 362 clear_inode_flag(inode, FI_UPDATE_WRITE); 363 goto out; 364 } 365 sync_nodes: 366 atomic_inc(&sbi->wb_sync_req[NODE]); 367 ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id); 368 atomic_dec(&sbi->wb_sync_req[NODE]); 369 if (ret) 370 goto out; 371 372 /* if cp_error was enabled, we should avoid infinite loop */ 373 if (unlikely(f2fs_cp_error(sbi))) { 374 ret = -EIO; 375 goto out; 376 } 377 378 if (f2fs_need_inode_block_update(sbi, ino)) { 379 f2fs_mark_inode_dirty_sync(inode, true); 380 f2fs_write_inode(inode, NULL); 381 goto sync_nodes; 382 } 383 384 /* 385 * If it's atomic_write, it's just fine to keep write ordering. So 386 * here we don't need to wait for node write completion, since we use 387 * node chain which serializes node blocks. If one of node writes are 388 * reordered, we can see simply broken chain, resulting in stopping 389 * roll-forward recovery. It means we'll recover all or none node blocks 390 * given fsync mark. 391 */ 392 if (!atomic) { 393 ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id); 394 if (ret) 395 goto out; 396 } 397 398 /* once recovery info is written, don't need to tack this */ 399 f2fs_remove_ino_entry(sbi, ino, APPEND_INO); 400 clear_inode_flag(inode, FI_APPEND_WRITE); 401 flush_out: 402 if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) 403 ret = f2fs_issue_flush(sbi, inode->i_ino); 404 if (!ret) { 405 f2fs_remove_ino_entry(sbi, ino, UPDATE_INO); 406 clear_inode_flag(inode, FI_UPDATE_WRITE); 407 f2fs_remove_ino_entry(sbi, ino, FLUSH_INO); 408 } 409 f2fs_update_time(sbi, REQ_TIME); 410 out: 411 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); 412 return ret; 413 } 414 415 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 416 { 417 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) 418 return -EIO; 419 return f2fs_do_sync_file(file, start, end, datasync, false); 420 } 421 422 static bool __found_offset(struct address_space *mapping, 423 struct dnode_of_data *dn, pgoff_t index, int whence) 424 { 425 block_t blkaddr = f2fs_data_blkaddr(dn); 426 struct inode *inode = mapping->host; 427 bool compressed_cluster = false; 428 429 if (f2fs_compressed_file(inode)) { 430 block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_folio, 431 ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size)); 432 433 compressed_cluster = first_blkaddr == COMPRESS_ADDR; 434 } 435 436 switch (whence) { 437 case SEEK_DATA: 438 if (__is_valid_data_blkaddr(blkaddr)) 439 return true; 440 if (blkaddr == NEW_ADDR && 441 xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY)) 442 return true; 443 if (compressed_cluster) 444 return true; 445 break; 446 case SEEK_HOLE: 447 if (compressed_cluster) 448 return false; 449 if (blkaddr == NULL_ADDR) 450 return true; 451 break; 452 } 453 return false; 454 } 455 456 static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) 457 { 458 struct inode *inode = file->f_mapping->host; 459 loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 460 struct dnode_of_data dn; 461 pgoff_t pgofs, end_offset; 462 loff_t data_ofs = offset; 463 loff_t isize; 464 int err = 0; 465 466 inode_lock_shared(inode); 467 468 isize = i_size_read(inode); 469 if (offset >= isize) 470 goto fail; 471 472 /* handle inline data case */ 473 if (f2fs_has_inline_data(inode)) { 474 if (whence == SEEK_HOLE) { 475 data_ofs = isize; 476 goto found; 477 } else if (whence == SEEK_DATA) { 478 data_ofs = offset; 479 goto found; 480 } 481 } 482 483 pgofs = (pgoff_t)(offset >> PAGE_SHIFT); 484 485 for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) { 486 set_new_dnode(&dn, inode, NULL, NULL, 0); 487 err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE); 488 if (err && err != -ENOENT) { 489 goto fail; 490 } else if (err == -ENOENT) { 491 /* direct node does not exists */ 492 if (whence == SEEK_DATA) { 493 pgofs = f2fs_get_next_page_offset(&dn, pgofs); 494 continue; 495 } else { 496 goto found; 497 } 498 } 499 500 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 501 502 /* find data/hole in dnode block */ 503 for (; dn.ofs_in_node < end_offset; 504 dn.ofs_in_node++, pgofs++, 505 data_ofs = (loff_t)pgofs << PAGE_SHIFT) { 506 block_t blkaddr; 507 508 blkaddr = f2fs_data_blkaddr(&dn); 509 510 if (__is_valid_data_blkaddr(blkaddr) && 511 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), 512 blkaddr, DATA_GENERIC_ENHANCE)) { 513 f2fs_put_dnode(&dn); 514 goto fail; 515 } 516 517 if (__found_offset(file->f_mapping, &dn, 518 pgofs, whence)) { 519 f2fs_put_dnode(&dn); 520 goto found; 521 } 522 } 523 f2fs_put_dnode(&dn); 524 } 525 526 if (whence == SEEK_DATA) 527 goto fail; 528 found: 529 if (whence == SEEK_HOLE && data_ofs > isize) 530 data_ofs = isize; 531 inode_unlock_shared(inode); 532 return vfs_setpos(file, data_ofs, maxbytes); 533 fail: 534 inode_unlock_shared(inode); 535 return -ENXIO; 536 } 537 538 static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) 539 { 540 struct inode *inode = file->f_mapping->host; 541 loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 542 543 switch (whence) { 544 case SEEK_SET: 545 case SEEK_CUR: 546 case SEEK_END: 547 return generic_file_llseek_size(file, offset, whence, 548 maxbytes, i_size_read(inode)); 549 case SEEK_DATA: 550 case SEEK_HOLE: 551 if (offset < 0) 552 return -ENXIO; 553 return f2fs_seek_block(file, offset, whence); 554 } 555 556 return -EINVAL; 557 } 558 559 static int f2fs_file_mmap_prepare(struct vm_area_desc *desc) 560 { 561 struct file *file = desc->file; 562 struct inode *inode = file_inode(file); 563 564 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 565 return -EIO; 566 567 if (!f2fs_is_compress_backend_ready(inode)) 568 return -EOPNOTSUPP; 569 570 file_accessed(file); 571 desc->vm_ops = &f2fs_file_vm_ops; 572 573 f2fs_down_read(&F2FS_I(inode)->i_sem); 574 set_inode_flag(inode, FI_MMAP_FILE); 575 f2fs_up_read(&F2FS_I(inode)->i_sem); 576 577 return 0; 578 } 579 580 static int finish_preallocate_blocks(struct inode *inode) 581 { 582 int ret = 0; 583 bool opened; 584 585 f2fs_down_read(&F2FS_I(inode)->i_sem); 586 opened = is_inode_flag_set(inode, FI_OPENED_FILE); 587 f2fs_up_read(&F2FS_I(inode)->i_sem); 588 if (opened) 589 return 0; 590 591 inode_lock(inode); 592 if (is_inode_flag_set(inode, FI_OPENED_FILE)) 593 goto out_unlock; 594 595 if (!file_should_truncate(inode)) 596 goto out_update; 597 598 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 599 filemap_invalidate_lock(inode->i_mapping); 600 601 truncate_setsize(inode, i_size_read(inode)); 602 ret = f2fs_truncate(inode); 603 604 filemap_invalidate_unlock(inode->i_mapping); 605 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 606 if (ret) 607 goto out_unlock; 608 609 file_dont_truncate(inode); 610 out_update: 611 f2fs_down_write(&F2FS_I(inode)->i_sem); 612 set_inode_flag(inode, FI_OPENED_FILE); 613 f2fs_up_write(&F2FS_I(inode)->i_sem); 614 out_unlock: 615 inode_unlock(inode); 616 return ret; 617 } 618 619 static int f2fs_file_open(struct inode *inode, struct file *filp) 620 { 621 int err = fscrypt_file_open(inode, filp); 622 623 if (err) 624 return err; 625 626 if (!f2fs_is_compress_backend_ready(inode)) 627 return -EOPNOTSUPP; 628 629 if (mapping_large_folio_support(inode->i_mapping) && 630 filp->f_mode & FMODE_WRITE) 631 return -EOPNOTSUPP; 632 633 err = fsverity_file_open(inode, filp); 634 if (err) 635 return err; 636 637 filp->f_mode |= FMODE_NOWAIT; 638 filp->f_mode |= FMODE_CAN_ODIRECT; 639 640 err = dquot_file_open(inode, filp); 641 if (err) 642 return err; 643 644 err = finish_preallocate_blocks(inode); 645 if (!err) 646 atomic_inc(&F2FS_I(inode)->open_count); 647 return err; 648 } 649 650 void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) 651 { 652 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 653 int nr_free = 0, ofs = dn->ofs_in_node, len = count; 654 __le32 *addr; 655 bool compressed_cluster = false; 656 int cluster_index = 0, valid_blocks = 0; 657 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 658 bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks); 659 block_t blkstart; 660 int blklen = 0; 661 662 addr = get_dnode_addr(dn->inode, dn->node_folio) + ofs; 663 blkstart = le32_to_cpu(*addr); 664 665 /* Assumption: truncation starts with cluster */ 666 for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { 667 block_t blkaddr = le32_to_cpu(*addr); 668 669 if (f2fs_compressed_file(dn->inode) && 670 !(cluster_index & (cluster_size - 1))) { 671 if (compressed_cluster) 672 f2fs_i_compr_blocks_update(dn->inode, 673 valid_blocks, false); 674 compressed_cluster = (blkaddr == COMPRESS_ADDR); 675 valid_blocks = 0; 676 } 677 678 if (blkaddr == NULL_ADDR) 679 goto next; 680 681 f2fs_set_data_blkaddr(dn, NULL_ADDR); 682 683 if (__is_valid_data_blkaddr(blkaddr)) { 684 if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE)) 685 goto next; 686 if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr, 687 DATA_GENERIC_ENHANCE)) 688 goto next; 689 if (compressed_cluster) 690 valid_blocks++; 691 } 692 693 if (blkstart + blklen == blkaddr) { 694 blklen++; 695 } else { 696 f2fs_invalidate_blocks(sbi, blkstart, blklen); 697 blkstart = blkaddr; 698 blklen = 1; 699 } 700 701 if (!released || blkaddr != COMPRESS_ADDR) 702 nr_free++; 703 704 continue; 705 706 next: 707 if (blklen) 708 f2fs_invalidate_blocks(sbi, blkstart, blklen); 709 710 blkstart = le32_to_cpu(*(addr + 1)); 711 blklen = 0; 712 } 713 714 if (blklen) 715 f2fs_invalidate_blocks(sbi, blkstart, blklen); 716 717 if (compressed_cluster) 718 f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false); 719 720 if (nr_free) { 721 pgoff_t fofs; 722 /* 723 * once we invalidate valid blkaddr in range [ofs, ofs + count], 724 * we will invalidate all blkaddr in the whole range. 725 */ 726 fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), 727 dn->inode) + ofs; 728 f2fs_update_read_extent_cache_range(dn, fofs, 0, len); 729 f2fs_update_age_extent_cache_range(dn, fofs, len); 730 dec_valid_block_count(sbi, dn->inode, nr_free); 731 } 732 dn->ofs_in_node = ofs; 733 734 f2fs_update_time(sbi, REQ_TIME); 735 trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, 736 dn->ofs_in_node, nr_free); 737 } 738 739 static int truncate_partial_data_page(struct inode *inode, u64 from, 740 bool cache_only) 741 { 742 loff_t offset = from & (PAGE_SIZE - 1); 743 pgoff_t index = from >> PAGE_SHIFT; 744 struct address_space *mapping = inode->i_mapping; 745 struct folio *folio; 746 747 if (!offset && !cache_only) 748 return 0; 749 750 if (cache_only) { 751 folio = filemap_lock_folio(mapping, index); 752 if (IS_ERR(folio)) 753 return 0; 754 if (folio_test_uptodate(folio)) 755 goto truncate_out; 756 f2fs_folio_put(folio, true); 757 return 0; 758 } 759 760 folio = f2fs_get_lock_data_folio(inode, index, true); 761 if (IS_ERR(folio)) 762 return PTR_ERR(folio) == -ENOENT ? 0 : PTR_ERR(folio); 763 truncate_out: 764 f2fs_folio_wait_writeback(folio, DATA, true, true); 765 folio_zero_segment(folio, offset, folio_size(folio)); 766 767 /* An encrypted inode should have a key and truncate the last page. */ 768 f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode)); 769 if (!cache_only) 770 folio_mark_dirty(folio); 771 f2fs_folio_put(folio, true); 772 return 0; 773 } 774 775 int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) 776 { 777 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 778 struct dnode_of_data dn; 779 struct f2fs_lock_context lc; 780 pgoff_t free_from; 781 int count = 0, err = 0; 782 struct folio *ifolio; 783 bool truncate_page = false; 784 785 trace_f2fs_truncate_blocks_enter(inode, from); 786 787 if (IS_DEVICE_ALIASING(inode) && from) { 788 err = -EINVAL; 789 goto out_err; 790 } 791 792 free_from = (pgoff_t)F2FS_BLK_ALIGN(from); 793 794 if (free_from >= max_file_blocks(inode)) 795 goto free_partial; 796 797 if (lock) 798 f2fs_lock_op(sbi, &lc); 799 800 ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); 801 if (IS_ERR(ifolio)) { 802 err = PTR_ERR(ifolio); 803 goto out; 804 } 805 806 if (IS_DEVICE_ALIASING(inode)) { 807 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; 808 struct extent_info ei = et->largest; 809 810 f2fs_invalidate_blocks(sbi, ei.blk, ei.len); 811 812 dec_valid_block_count(sbi, inode, ei.len); 813 f2fs_update_time(sbi, REQ_TIME); 814 815 f2fs_folio_put(ifolio, true); 816 goto out; 817 } 818 819 if (f2fs_has_inline_data(inode)) { 820 f2fs_truncate_inline_inode(inode, ifolio, from); 821 f2fs_folio_put(ifolio, true); 822 truncate_page = true; 823 goto out; 824 } 825 826 set_new_dnode(&dn, inode, ifolio, NULL, 0); 827 err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); 828 if (err) { 829 if (err == -ENOENT) 830 goto free_next; 831 goto out; 832 } 833 834 count = ADDRS_PER_PAGE(dn.node_folio, inode); 835 836 count -= dn.ofs_in_node; 837 f2fs_bug_on(sbi, count < 0); 838 839 if (dn.ofs_in_node || IS_INODE(dn.node_folio)) { 840 f2fs_truncate_data_blocks_range(&dn, count); 841 free_from += count; 842 } 843 844 f2fs_put_dnode(&dn); 845 free_next: 846 err = f2fs_truncate_inode_blocks(inode, free_from); 847 out: 848 if (lock) 849 f2fs_unlock_op(sbi, &lc); 850 free_partial: 851 /* lastly zero out the first data page */ 852 if (!err) 853 err = truncate_partial_data_page(inode, from, truncate_page); 854 out_err: 855 trace_f2fs_truncate_blocks_exit(inode, err); 856 return err; 857 } 858 859 int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) 860 { 861 u64 free_from = from; 862 int err; 863 864 #ifdef CONFIG_F2FS_FS_COMPRESSION 865 /* 866 * for compressed file, only support cluster size 867 * aligned truncation. 868 */ 869 if (f2fs_compressed_file(inode)) 870 free_from = round_up(from, 871 F2FS_I(inode)->i_cluster_size << PAGE_SHIFT); 872 #endif 873 874 err = f2fs_do_truncate_blocks(inode, free_from, lock); 875 if (err) 876 return err; 877 878 #ifdef CONFIG_F2FS_FS_COMPRESSION 879 /* 880 * For compressed file, after release compress blocks, don't allow write 881 * direct, but we should allow write direct after truncate to zero. 882 */ 883 if (f2fs_compressed_file(inode) && !free_from 884 && is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) 885 clear_inode_flag(inode, FI_COMPRESS_RELEASED); 886 887 if (from != free_from) { 888 err = f2fs_truncate_partial_cluster(inode, from, lock); 889 if (err) 890 return err; 891 } 892 #endif 893 894 return 0; 895 } 896 897 int f2fs_truncate(struct inode *inode) 898 { 899 int err; 900 901 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 902 return -EIO; 903 904 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 905 S_ISLNK(inode->i_mode))) 906 return 0; 907 908 trace_f2fs_truncate(inode); 909 910 if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) 911 return -EIO; 912 913 err = f2fs_dquot_initialize(inode); 914 if (err) 915 return err; 916 917 /* we should check inline_data size */ 918 if (!f2fs_may_inline_data(inode)) { 919 err = f2fs_convert_inline_inode(inode); 920 if (err) { 921 /* 922 * Always truncate page #0 to avoid page cache 923 * leak in evict() path. 924 */ 925 truncate_inode_pages_range(inode->i_mapping, 926 F2FS_BLK_TO_BYTES(0), 927 F2FS_BLK_END_BYTES(0)); 928 return err; 929 } 930 } 931 932 err = f2fs_truncate_blocks(inode, i_size_read(inode), true); 933 if (err) 934 return err; 935 936 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 937 f2fs_mark_inode_dirty_sync(inode, false); 938 return 0; 939 } 940 941 static bool f2fs_force_buffered_io(struct inode *inode, int rw) 942 { 943 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 944 945 if (!fscrypt_dio_supported(inode)) 946 return true; 947 if (fsverity_active(inode)) 948 return true; 949 if (f2fs_compressed_file(inode)) 950 return true; 951 /* 952 * only force direct read to use buffered IO, for direct write, 953 * it expects inline data conversion before committing IO. 954 */ 955 if (f2fs_has_inline_data(inode) && rw == READ) 956 return true; 957 958 /* disallow direct IO if any of devices has unaligned blksize */ 959 if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) 960 return true; 961 /* 962 * for blkzoned device, fallback direct IO to buffered IO, so 963 * all IOs can be serialized by log-structured write. 964 */ 965 if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) && 966 !f2fs_is_pinned_file(inode)) 967 return true; 968 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) 969 return true; 970 971 return false; 972 } 973 974 int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path, 975 struct kstat *stat, u32 request_mask, unsigned int query_flags) 976 { 977 struct inode *inode = d_inode(path->dentry); 978 struct f2fs_inode_info *fi = F2FS_I(inode); 979 struct f2fs_inode *ri = NULL; 980 unsigned int flags; 981 982 if (f2fs_has_extra_attr(inode) && 983 f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && 984 F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { 985 stat->result_mask |= STATX_BTIME; 986 stat->btime.tv_sec = fi->i_crtime.tv_sec; 987 stat->btime.tv_nsec = fi->i_crtime.tv_nsec; 988 } 989 990 /* 991 * Return the DIO alignment restrictions if requested. We only return 992 * this information when requested, since on encrypted files it might 993 * take a fair bit of work to get if the file wasn't opened recently. 994 * 995 * f2fs sometimes supports DIO reads but not DIO writes. STATX_DIOALIGN 996 * cannot represent that, so in that case we report no DIO support. 997 */ 998 if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) { 999 unsigned int bsize = i_blocksize(inode); 1000 1001 stat->result_mask |= STATX_DIOALIGN; 1002 if (!f2fs_force_buffered_io(inode, WRITE)) { 1003 stat->dio_mem_align = bsize; 1004 stat->dio_offset_align = bsize; 1005 } 1006 } 1007 1008 flags = fi->i_flags; 1009 if (flags & F2FS_COMPR_FL) 1010 stat->attributes |= STATX_ATTR_COMPRESSED; 1011 if (flags & F2FS_APPEND_FL) 1012 stat->attributes |= STATX_ATTR_APPEND; 1013 if (IS_ENCRYPTED(inode)) 1014 stat->attributes |= STATX_ATTR_ENCRYPTED; 1015 if (flags & F2FS_IMMUTABLE_FL) 1016 stat->attributes |= STATX_ATTR_IMMUTABLE; 1017 if (flags & F2FS_NODUMP_FL) 1018 stat->attributes |= STATX_ATTR_NODUMP; 1019 if (IS_VERITY(inode)) 1020 stat->attributes |= STATX_ATTR_VERITY; 1021 1022 stat->attributes_mask |= (STATX_ATTR_COMPRESSED | 1023 STATX_ATTR_APPEND | 1024 STATX_ATTR_ENCRYPTED | 1025 STATX_ATTR_IMMUTABLE | 1026 STATX_ATTR_NODUMP | 1027 STATX_ATTR_VERITY); 1028 1029 generic_fillattr(idmap, request_mask, inode, stat); 1030 1031 /* we need to show initial sectors used for inline_data/dentries */ 1032 if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) || 1033 f2fs_has_inline_dentry(inode)) 1034 stat->blocks += (stat->size + 511) >> 9; 1035 1036 return 0; 1037 } 1038 1039 #ifdef CONFIG_F2FS_FS_POSIX_ACL 1040 static void __setattr_copy(struct mnt_idmap *idmap, 1041 struct inode *inode, const struct iattr *attr) 1042 { 1043 unsigned int ia_valid = attr->ia_valid; 1044 1045 i_uid_update(idmap, attr, inode); 1046 i_gid_update(idmap, attr, inode); 1047 if (ia_valid & ATTR_ATIME) 1048 inode_set_atime_to_ts(inode, attr->ia_atime); 1049 if (ia_valid & ATTR_MTIME) 1050 inode_set_mtime_to_ts(inode, attr->ia_mtime); 1051 if (ia_valid & ATTR_CTIME) 1052 inode_set_ctime_to_ts(inode, attr->ia_ctime); 1053 if (ia_valid & ATTR_MODE) { 1054 umode_t mode = attr->ia_mode; 1055 1056 if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) 1057 mode &= ~S_ISGID; 1058 set_acl_inode(inode, mode); 1059 } 1060 } 1061 #else 1062 #define __setattr_copy setattr_copy 1063 #endif 1064 1065 int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 1066 struct iattr *attr) 1067 { 1068 struct inode *inode = d_inode(dentry); 1069 struct f2fs_inode_info *fi = F2FS_I(inode); 1070 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1071 int err; 1072 1073 if (unlikely(f2fs_cp_error(sbi))) 1074 return -EIO; 1075 1076 err = setattr_prepare(idmap, dentry, attr); 1077 if (err) 1078 return err; 1079 1080 err = fscrypt_prepare_setattr(dentry, attr); 1081 if (err) 1082 return err; 1083 1084 if (unlikely(IS_IMMUTABLE(inode))) 1085 return -EPERM; 1086 1087 if (unlikely(IS_APPEND(inode) && 1088 (attr->ia_valid & (ATTR_MODE | ATTR_UID | 1089 ATTR_GID | ATTR_TIMES_SET)))) 1090 return -EPERM; 1091 1092 if ((attr->ia_valid & ATTR_SIZE)) { 1093 if (!f2fs_is_compress_backend_ready(inode) || 1094 IS_DEVICE_ALIASING(inode)) 1095 return -EOPNOTSUPP; 1096 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && 1097 !IS_ALIGNED(attr->ia_size, 1098 F2FS_BLK_TO_BYTES(fi->i_cluster_size))) 1099 return -EINVAL; 1100 /* 1101 * To prevent scattered pin block generation, we don't allow 1102 * smaller/equal size unaligned truncation for pinned file. 1103 * We only support overwrite IO to pinned file, so don't 1104 * care about larger size truncation. 1105 */ 1106 if (f2fs_is_pinned_file(inode) && 1107 attr->ia_size <= i_size_read(inode) && 1108 !IS_ALIGNED(attr->ia_size, 1109 F2FS_BLK_TO_BYTES(CAP_BLKS_PER_SEC(sbi)))) 1110 return -EINVAL; 1111 } 1112 1113 if (is_quota_modification(idmap, inode, attr)) { 1114 err = f2fs_dquot_initialize(inode); 1115 if (err) 1116 return err; 1117 } 1118 if (i_uid_needs_update(idmap, attr, inode) || 1119 i_gid_needs_update(idmap, attr, inode)) { 1120 struct f2fs_lock_context lc; 1121 1122 f2fs_lock_op(sbi, &lc); 1123 err = dquot_transfer(idmap, inode, attr); 1124 if (err) { 1125 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); 1126 f2fs_unlock_op(sbi, &lc); 1127 return err; 1128 } 1129 /* 1130 * update uid/gid under lock_op(), so that dquot and inode can 1131 * be updated atomically. 1132 */ 1133 i_uid_update(idmap, attr, inode); 1134 i_gid_update(idmap, attr, inode); 1135 f2fs_mark_inode_dirty_sync(inode, true); 1136 f2fs_unlock_op(sbi, &lc); 1137 } 1138 1139 if (attr->ia_valid & ATTR_SIZE) { 1140 loff_t old_size = i_size_read(inode); 1141 1142 if (attr->ia_size > MAX_INLINE_DATA(inode)) { 1143 /* 1144 * should convert inline inode before i_size_write to 1145 * keep smaller than inline_data size with inline flag. 1146 */ 1147 err = f2fs_convert_inline_inode(inode); 1148 if (err) 1149 return err; 1150 } 1151 1152 /* 1153 * wait for inflight dio, blocks should be removed after 1154 * IO completion. 1155 */ 1156 if (attr->ia_size < old_size) 1157 inode_dio_wait(inode); 1158 1159 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 1160 filemap_invalidate_lock(inode->i_mapping); 1161 1162 if (attr->ia_size > old_size) 1163 f2fs_zero_post_eof_page(inode, attr->ia_size, false); 1164 truncate_setsize(inode, attr->ia_size); 1165 1166 if (attr->ia_size <= old_size) 1167 err = f2fs_truncate(inode); 1168 /* 1169 * do not trim all blocks after i_size if target size is 1170 * larger than i_size. 1171 */ 1172 filemap_invalidate_unlock(inode->i_mapping); 1173 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 1174 if (err) 1175 return err; 1176 1177 spin_lock(&fi->i_size_lock); 1178 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 1179 fi->last_disk_size = i_size_read(inode); 1180 spin_unlock(&fi->i_size_lock); 1181 } 1182 1183 __setattr_copy(idmap, inode, attr); 1184 1185 if (attr->ia_valid & ATTR_MODE) { 1186 err = posix_acl_chmod(idmap, dentry, f2fs_get_inode_mode(inode)); 1187 1188 if (is_inode_flag_set(inode, FI_ACL_MODE)) { 1189 if (!err) 1190 inode->i_mode = fi->i_acl_mode; 1191 clear_inode_flag(inode, FI_ACL_MODE); 1192 } 1193 } 1194 1195 /* file size may changed here */ 1196 f2fs_mark_inode_dirty_sync(inode, true); 1197 1198 /* inode change will produce dirty node pages flushed by checkpoint */ 1199 f2fs_balance_fs(sbi, true); 1200 1201 return err; 1202 } 1203 1204 const struct inode_operations f2fs_file_inode_operations = { 1205 .getattr = f2fs_getattr, 1206 .setattr = f2fs_setattr, 1207 .get_inode_acl = f2fs_get_acl, 1208 .set_acl = f2fs_set_acl, 1209 .listxattr = f2fs_listxattr, 1210 .fiemap = f2fs_fiemap, 1211 .fileattr_get = f2fs_fileattr_get, 1212 .fileattr_set = f2fs_fileattr_set, 1213 }; 1214 1215 static int fill_zero(struct inode *inode, pgoff_t index, 1216 loff_t start, loff_t len) 1217 { 1218 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1219 struct folio *folio; 1220 struct f2fs_lock_context lc; 1221 1222 if (!len) 1223 return 0; 1224 1225 f2fs_balance_fs(sbi, true); 1226 1227 f2fs_lock_op(sbi, &lc); 1228 folio = f2fs_get_new_data_folio(inode, NULL, index, false); 1229 f2fs_unlock_op(sbi, &lc); 1230 1231 if (IS_ERR(folio)) 1232 return PTR_ERR(folio); 1233 1234 f2fs_folio_wait_writeback(folio, DATA, true, true); 1235 folio_zero_range(folio, start, len); 1236 folio_mark_dirty(folio); 1237 f2fs_folio_put(folio, true); 1238 return 0; 1239 } 1240 1241 int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) 1242 { 1243 int err; 1244 1245 while (pg_start < pg_end) { 1246 struct dnode_of_data dn; 1247 pgoff_t end_offset, count; 1248 1249 set_new_dnode(&dn, inode, NULL, NULL, 0); 1250 err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); 1251 if (err) { 1252 if (err == -ENOENT) { 1253 pg_start = f2fs_get_next_page_offset(&dn, 1254 pg_start); 1255 continue; 1256 } 1257 return err; 1258 } 1259 1260 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 1261 count = min(end_offset - dn.ofs_in_node, pg_end - pg_start); 1262 1263 f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset); 1264 1265 f2fs_truncate_data_blocks_range(&dn, count); 1266 f2fs_put_dnode(&dn); 1267 1268 pg_start += count; 1269 } 1270 return 0; 1271 } 1272 1273 static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len) 1274 { 1275 pgoff_t pg_start, pg_end; 1276 loff_t off_start, off_end; 1277 int ret; 1278 1279 ret = f2fs_convert_inline_inode(inode); 1280 if (ret) 1281 return ret; 1282 1283 f2fs_zero_post_eof_page(inode, offset + len, true); 1284 1285 pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; 1286 pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; 1287 1288 off_start = offset & (PAGE_SIZE - 1); 1289 off_end = (offset + len) & (PAGE_SIZE - 1); 1290 1291 if (pg_start == pg_end) { 1292 ret = fill_zero(inode, pg_start, off_start, 1293 off_end - off_start); 1294 if (ret) 1295 return ret; 1296 } else { 1297 if (off_start) { 1298 ret = fill_zero(inode, pg_start++, off_start, 1299 PAGE_SIZE - off_start); 1300 if (ret) 1301 return ret; 1302 } 1303 if (off_end) { 1304 ret = fill_zero(inode, pg_end, 0, off_end); 1305 if (ret) 1306 return ret; 1307 } 1308 1309 if (pg_start < pg_end) { 1310 loff_t blk_start, blk_end; 1311 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1312 struct f2fs_lock_context lc; 1313 1314 f2fs_balance_fs(sbi, true); 1315 1316 blk_start = (loff_t)pg_start << PAGE_SHIFT; 1317 blk_end = (loff_t)pg_end << PAGE_SHIFT; 1318 1319 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1320 filemap_invalidate_lock(inode->i_mapping); 1321 1322 truncate_pagecache_range(inode, blk_start, blk_end - 1); 1323 1324 f2fs_lock_op(sbi, &lc); 1325 ret = f2fs_truncate_hole(inode, pg_start, pg_end); 1326 f2fs_unlock_op(sbi, &lc); 1327 1328 filemap_invalidate_unlock(inode->i_mapping); 1329 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1330 } 1331 } 1332 1333 return ret; 1334 } 1335 1336 static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, 1337 int *do_replace, pgoff_t off, pgoff_t len) 1338 { 1339 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1340 struct dnode_of_data dn; 1341 int ret, done, i; 1342 1343 next_dnode: 1344 set_new_dnode(&dn, inode, NULL, NULL, 0); 1345 ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); 1346 if (ret && ret != -ENOENT) { 1347 return ret; 1348 } else if (ret == -ENOENT) { 1349 if (dn.max_level == 0) 1350 return -ENOENT; 1351 done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - 1352 dn.ofs_in_node, len); 1353 blkaddr += done; 1354 do_replace += done; 1355 goto next; 1356 } 1357 1358 done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, inode) - 1359 dn.ofs_in_node, len); 1360 for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { 1361 *blkaddr = f2fs_data_blkaddr(&dn); 1362 1363 if (__is_valid_data_blkaddr(*blkaddr) && 1364 !f2fs_is_valid_blkaddr(sbi, *blkaddr, 1365 DATA_GENERIC_ENHANCE)) { 1366 f2fs_put_dnode(&dn); 1367 return -EFSCORRUPTED; 1368 } 1369 1370 if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { 1371 1372 if (f2fs_lfs_mode(sbi)) { 1373 f2fs_put_dnode(&dn); 1374 return -EOPNOTSUPP; 1375 } 1376 1377 /* do not invalidate this block address */ 1378 f2fs_update_data_blkaddr(&dn, NULL_ADDR); 1379 *do_replace = 1; 1380 } 1381 } 1382 f2fs_put_dnode(&dn); 1383 next: 1384 len -= done; 1385 off += done; 1386 if (len) 1387 goto next_dnode; 1388 return 0; 1389 } 1390 1391 static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, 1392 int *do_replace, pgoff_t off, int len) 1393 { 1394 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1395 struct dnode_of_data dn; 1396 int ret, i; 1397 1398 for (i = 0; i < len; i++, do_replace++, blkaddr++) { 1399 if (*do_replace == 0) 1400 continue; 1401 1402 set_new_dnode(&dn, inode, NULL, NULL, 0); 1403 ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); 1404 if (ret) { 1405 dec_valid_block_count(sbi, inode, 1); 1406 f2fs_invalidate_blocks(sbi, *blkaddr, 1); 1407 } else { 1408 f2fs_update_data_blkaddr(&dn, *blkaddr); 1409 } 1410 f2fs_put_dnode(&dn); 1411 } 1412 return 0; 1413 } 1414 1415 static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, 1416 block_t *blkaddr, int *do_replace, 1417 pgoff_t src, pgoff_t dst, pgoff_t len, bool full) 1418 { 1419 struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode); 1420 pgoff_t i = 0; 1421 int ret; 1422 1423 while (i < len) { 1424 if (blkaddr[i] == NULL_ADDR && !full) { 1425 i++; 1426 continue; 1427 } 1428 1429 if (do_replace[i] || blkaddr[i] == NULL_ADDR) { 1430 struct dnode_of_data dn; 1431 struct node_info ni; 1432 size_t new_size; 1433 pgoff_t ilen; 1434 1435 set_new_dnode(&dn, dst_inode, NULL, NULL, 0); 1436 ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE); 1437 if (ret) 1438 return ret; 1439 1440 ret = f2fs_get_node_info(sbi, dn.nid, &ni, false); 1441 if (ret) { 1442 f2fs_put_dnode(&dn); 1443 return ret; 1444 } 1445 1446 ilen = min((pgoff_t) 1447 ADDRS_PER_PAGE(dn.node_folio, dst_inode) - 1448 dn.ofs_in_node, len - i); 1449 do { 1450 dn.data_blkaddr = f2fs_data_blkaddr(&dn); 1451 f2fs_truncate_data_blocks_range(&dn, 1); 1452 1453 if (do_replace[i]) { 1454 f2fs_i_blocks_write(src_inode, 1455 1, false, false); 1456 f2fs_i_blocks_write(dst_inode, 1457 1, true, false); 1458 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, 1459 blkaddr[i], ni.version, true, false); 1460 1461 do_replace[i] = 0; 1462 } 1463 dn.ofs_in_node++; 1464 i++; 1465 new_size = (loff_t)(dst + i) << PAGE_SHIFT; 1466 if (dst_inode->i_size < new_size) 1467 f2fs_i_size_write(dst_inode, new_size); 1468 } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR)); 1469 1470 f2fs_put_dnode(&dn); 1471 } else { 1472 struct folio *fsrc, *fdst; 1473 1474 fsrc = f2fs_get_lock_data_folio(src_inode, 1475 src + i, true); 1476 if (IS_ERR(fsrc)) 1477 return PTR_ERR(fsrc); 1478 fdst = f2fs_get_new_data_folio(dst_inode, NULL, dst + i, 1479 true); 1480 if (IS_ERR(fdst)) { 1481 f2fs_folio_put(fsrc, true); 1482 return PTR_ERR(fdst); 1483 } 1484 1485 f2fs_folio_wait_writeback(fdst, DATA, true, true); 1486 1487 memcpy_folio(fdst, 0, fsrc, 0, PAGE_SIZE); 1488 folio_mark_dirty(fdst); 1489 folio_set_f2fs_gcing(fdst); 1490 f2fs_folio_put(fdst, true); 1491 f2fs_folio_put(fsrc, true); 1492 1493 ret = f2fs_truncate_hole(src_inode, 1494 src + i, src + i + 1); 1495 if (ret) 1496 return ret; 1497 i++; 1498 } 1499 } 1500 return 0; 1501 } 1502 1503 static int __exchange_data_block(struct inode *src_inode, 1504 struct inode *dst_inode, pgoff_t src, pgoff_t dst, 1505 pgoff_t len, bool full) 1506 { 1507 block_t *src_blkaddr; 1508 int *do_replace; 1509 pgoff_t olen; 1510 int ret; 1511 1512 while (len) { 1513 olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len); 1514 1515 src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode), 1516 array_size(olen, sizeof(block_t)), 1517 GFP_NOFS); 1518 if (!src_blkaddr) 1519 return -ENOMEM; 1520 1521 do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode), 1522 array_size(olen, sizeof(int)), 1523 GFP_NOFS); 1524 if (!do_replace) { 1525 kvfree(src_blkaddr); 1526 return -ENOMEM; 1527 } 1528 1529 ret = __read_out_blkaddrs(src_inode, src_blkaddr, 1530 do_replace, src, olen); 1531 if (ret) 1532 goto roll_back; 1533 1534 ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr, 1535 do_replace, src, dst, olen, full); 1536 if (ret) 1537 goto roll_back; 1538 1539 src += olen; 1540 dst += olen; 1541 len -= olen; 1542 1543 kvfree(src_blkaddr); 1544 kvfree(do_replace); 1545 } 1546 return 0; 1547 1548 roll_back: 1549 __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen); 1550 kvfree(src_blkaddr); 1551 kvfree(do_replace); 1552 return ret; 1553 } 1554 1555 static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) 1556 { 1557 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1558 struct f2fs_lock_context lc; 1559 pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 1560 pgoff_t start = offset >> PAGE_SHIFT; 1561 pgoff_t end = (offset + len) >> PAGE_SHIFT; 1562 int ret; 1563 1564 f2fs_balance_fs(sbi, true); 1565 1566 /* avoid gc operation during block exchange */ 1567 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1568 filemap_invalidate_lock(inode->i_mapping); 1569 1570 f2fs_zero_post_eof_page(inode, offset + len, false); 1571 1572 f2fs_lock_op(sbi, &lc); 1573 f2fs_drop_extent_tree(inode); 1574 truncate_pagecache(inode, offset); 1575 ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true); 1576 f2fs_unlock_op(sbi, &lc); 1577 1578 filemap_invalidate_unlock(inode->i_mapping); 1579 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1580 return ret; 1581 } 1582 1583 static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) 1584 { 1585 loff_t new_size; 1586 int ret; 1587 1588 if (offset + len >= i_size_read(inode)) 1589 return -EINVAL; 1590 1591 /* collapse range should be aligned to block size of f2fs. */ 1592 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) 1593 return -EINVAL; 1594 1595 ret = f2fs_convert_inline_inode(inode); 1596 if (ret) 1597 return ret; 1598 1599 /* write out all dirty pages from offset */ 1600 ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); 1601 if (ret) 1602 return ret; 1603 1604 ret = f2fs_do_collapse(inode, offset, len); 1605 if (ret) 1606 return ret; 1607 1608 /* write out all moved pages, if possible */ 1609 filemap_invalidate_lock(inode->i_mapping); 1610 filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); 1611 truncate_pagecache(inode, offset); 1612 1613 new_size = i_size_read(inode) - len; 1614 ret = f2fs_truncate_blocks(inode, new_size, true); 1615 filemap_invalidate_unlock(inode->i_mapping); 1616 if (!ret) 1617 f2fs_i_size_write(inode, new_size); 1618 return ret; 1619 } 1620 1621 static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, 1622 pgoff_t end) 1623 { 1624 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1625 pgoff_t index = start; 1626 unsigned int ofs_in_node = dn->ofs_in_node; 1627 blkcnt_t count = 0; 1628 int ret; 1629 1630 for (; index < end; index++, dn->ofs_in_node++) { 1631 if (f2fs_data_blkaddr(dn) == NULL_ADDR) 1632 count++; 1633 } 1634 1635 dn->ofs_in_node = ofs_in_node; 1636 ret = f2fs_reserve_new_blocks(dn, count); 1637 if (ret) 1638 return ret; 1639 1640 dn->ofs_in_node = ofs_in_node; 1641 for (index = start; index < end; index++, dn->ofs_in_node++) { 1642 dn->data_blkaddr = f2fs_data_blkaddr(dn); 1643 /* 1644 * f2fs_reserve_new_blocks will not guarantee entire block 1645 * allocation. 1646 */ 1647 if (dn->data_blkaddr == NULL_ADDR) { 1648 ret = -ENOSPC; 1649 break; 1650 } 1651 1652 if (dn->data_blkaddr == NEW_ADDR) 1653 continue; 1654 1655 if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr, 1656 DATA_GENERIC_ENHANCE)) { 1657 ret = -EFSCORRUPTED; 1658 break; 1659 } 1660 1661 f2fs_invalidate_blocks(sbi, dn->data_blkaddr, 1); 1662 f2fs_set_data_blkaddr(dn, NEW_ADDR); 1663 } 1664 1665 if (index > start) { 1666 f2fs_update_read_extent_cache_range(dn, start, 0, 1667 index - start); 1668 f2fs_update_age_extent_cache_range(dn, start, index - start); 1669 } 1670 1671 return ret; 1672 } 1673 1674 static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, 1675 int mode) 1676 { 1677 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1678 struct address_space *mapping = inode->i_mapping; 1679 pgoff_t index, pg_start, pg_end; 1680 loff_t new_size = i_size_read(inode); 1681 loff_t off_start, off_end; 1682 int ret = 0; 1683 1684 ret = inode_newsize_ok(inode, (len + offset)); 1685 if (ret) 1686 return ret; 1687 1688 ret = f2fs_convert_inline_inode(inode); 1689 if (ret) 1690 return ret; 1691 1692 ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); 1693 if (ret) 1694 return ret; 1695 1696 f2fs_zero_post_eof_page(inode, offset + len, true); 1697 1698 pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; 1699 pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; 1700 1701 off_start = offset & (PAGE_SIZE - 1); 1702 off_end = (offset + len) & (PAGE_SIZE - 1); 1703 1704 if (pg_start == pg_end) { 1705 ret = fill_zero(inode, pg_start, off_start, 1706 off_end - off_start); 1707 if (ret) 1708 return ret; 1709 1710 new_size = max_t(loff_t, new_size, offset + len); 1711 } else { 1712 if (off_start) { 1713 ret = fill_zero(inode, pg_start++, off_start, 1714 PAGE_SIZE - off_start); 1715 if (ret) 1716 return ret; 1717 1718 new_size = max_t(loff_t, new_size, 1719 (loff_t)pg_start << PAGE_SHIFT); 1720 } 1721 1722 for (index = pg_start; index < pg_end;) { 1723 struct dnode_of_data dn; 1724 struct f2fs_lock_context lc; 1725 unsigned int end_offset; 1726 pgoff_t end; 1727 1728 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1729 filemap_invalidate_lock(mapping); 1730 1731 truncate_pagecache_range(inode, 1732 (loff_t)index << PAGE_SHIFT, 1733 ((loff_t)pg_end << PAGE_SHIFT) - 1); 1734 1735 f2fs_lock_op(sbi, &lc); 1736 1737 set_new_dnode(&dn, inode, NULL, NULL, 0); 1738 ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE); 1739 if (ret) { 1740 f2fs_unlock_op(sbi, &lc); 1741 filemap_invalidate_unlock(mapping); 1742 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1743 goto out; 1744 } 1745 1746 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 1747 end = min(pg_end, end_offset - dn.ofs_in_node + index); 1748 1749 ret = f2fs_do_zero_range(&dn, index, end); 1750 f2fs_put_dnode(&dn); 1751 1752 f2fs_unlock_op(sbi, &lc); 1753 filemap_invalidate_unlock(mapping); 1754 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1755 1756 f2fs_balance_fs(sbi, dn.node_changed); 1757 1758 if (ret) 1759 goto out; 1760 1761 index = end; 1762 new_size = max_t(loff_t, new_size, 1763 (loff_t)index << PAGE_SHIFT); 1764 } 1765 1766 if (off_end) { 1767 ret = fill_zero(inode, pg_end, 0, off_end); 1768 if (ret) 1769 goto out; 1770 1771 new_size = max_t(loff_t, new_size, offset + len); 1772 } 1773 } 1774 1775 out: 1776 if (new_size > i_size_read(inode)) { 1777 if (mode & FALLOC_FL_KEEP_SIZE) 1778 file_set_keep_isize(inode); 1779 else 1780 f2fs_i_size_write(inode, new_size); 1781 } 1782 return ret; 1783 } 1784 1785 static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) 1786 { 1787 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1788 struct address_space *mapping = inode->i_mapping; 1789 pgoff_t nr, pg_start, pg_end, delta, idx; 1790 loff_t new_size; 1791 int ret = 0; 1792 1793 new_size = i_size_read(inode) + len; 1794 ret = inode_newsize_ok(inode, new_size); 1795 if (ret) 1796 return ret; 1797 1798 if (offset >= i_size_read(inode)) 1799 return -EINVAL; 1800 1801 /* insert range should be aligned to block size of f2fs. */ 1802 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) 1803 return -EINVAL; 1804 1805 ret = f2fs_convert_inline_inode(inode); 1806 if (ret) 1807 return ret; 1808 1809 f2fs_balance_fs(sbi, true); 1810 1811 filemap_invalidate_lock(mapping); 1812 ret = f2fs_truncate_blocks(inode, i_size_read(inode), true); 1813 filemap_invalidate_unlock(mapping); 1814 if (ret) 1815 return ret; 1816 1817 /* write out all dirty pages from offset */ 1818 ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX); 1819 if (ret) 1820 return ret; 1821 1822 pg_start = offset >> PAGE_SHIFT; 1823 pg_end = (offset + len) >> PAGE_SHIFT; 1824 delta = pg_end - pg_start; 1825 idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 1826 1827 /* avoid gc operation during block exchange */ 1828 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1829 filemap_invalidate_lock(mapping); 1830 1831 f2fs_zero_post_eof_page(inode, offset + len, false); 1832 truncate_pagecache(inode, offset); 1833 1834 while (!ret && idx > pg_start) { 1835 struct f2fs_lock_context lc; 1836 1837 nr = idx - pg_start; 1838 if (nr > delta) 1839 nr = delta; 1840 idx -= nr; 1841 1842 f2fs_lock_op(sbi, &lc); 1843 f2fs_drop_extent_tree(inode); 1844 1845 ret = __exchange_data_block(inode, inode, idx, 1846 idx + delta, nr, false); 1847 f2fs_unlock_op(sbi, &lc); 1848 } 1849 filemap_invalidate_unlock(mapping); 1850 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1851 if (ret) 1852 return ret; 1853 1854 /* write out all moved pages, if possible */ 1855 filemap_invalidate_lock(mapping); 1856 ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX); 1857 truncate_pagecache(inode, offset); 1858 filemap_invalidate_unlock(mapping); 1859 1860 if (!ret) 1861 f2fs_i_size_write(inode, new_size); 1862 return ret; 1863 } 1864 1865 static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, 1866 loff_t len, int mode) 1867 { 1868 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1869 struct f2fs_map_blocks map = { .m_next_pgofs = NULL, 1870 .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, 1871 .m_may_create = true }; 1872 struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, 1873 .init_gc_type = FG_GC, 1874 .should_migrate_blocks = false, 1875 .err_gc_skipped = true, 1876 .nr_free_secs = 0 }; 1877 pgoff_t pg_start, pg_end; 1878 loff_t new_size; 1879 loff_t off_end; 1880 block_t expanded = 0; 1881 int err; 1882 1883 err = inode_newsize_ok(inode, (len + offset)); 1884 if (err) 1885 return err; 1886 1887 err = f2fs_convert_inline_inode(inode); 1888 if (err) 1889 return err; 1890 1891 f2fs_zero_post_eof_page(inode, offset + len, true); 1892 1893 f2fs_balance_fs(sbi, true); 1894 1895 pg_start = ((unsigned long long)offset) >> PAGE_SHIFT; 1896 pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT; 1897 off_end = (offset + len) & (PAGE_SIZE - 1); 1898 1899 map.m_lblk = pg_start; 1900 map.m_len = pg_end - pg_start; 1901 if (off_end) 1902 map.m_len++; 1903 1904 if (!map.m_len) 1905 return 0; 1906 1907 if (f2fs_is_pinned_file(inode)) { 1908 block_t sec_blks = CAP_BLKS_PER_SEC(sbi); 1909 block_t sec_len = roundup(map.m_len, sec_blks); 1910 1911 map.m_len = sec_blks; 1912 next_alloc: 1913 f2fs_down_write(&sbi->pin_sem); 1914 1915 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 1916 if (has_not_enough_free_secs(sbi, 0, 0)) { 1917 f2fs_up_write(&sbi->pin_sem); 1918 err = -ENOSPC; 1919 f2fs_warn_ratelimited(sbi, 1920 "ino:%llu, start:%lu, end:%lu, need to trigger GC to " 1921 "reclaim enough free segment when checkpoint is enabled", 1922 inode->i_ino, pg_start, pg_end); 1923 goto out_err; 1924 } 1925 } 1926 1927 if (has_not_enough_free_secs(sbi, 0, 1928 sbi->reserved_pin_section)) { 1929 f2fs_down_write_trace(&sbi->gc_lock, &gc_control.lc); 1930 stat_inc_gc_call_count(sbi, FOREGROUND); 1931 err = f2fs_gc(sbi, &gc_control); 1932 if (err && err != -ENODATA) { 1933 f2fs_up_write(&sbi->pin_sem); 1934 goto out_err; 1935 } 1936 } 1937 1938 err = f2fs_allocate_pinning_section(sbi); 1939 if (err) { 1940 f2fs_up_write(&sbi->pin_sem); 1941 goto out_err; 1942 } 1943 1944 map.m_seg_type = CURSEG_COLD_DATA_PINNED; 1945 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO); 1946 file_dont_truncate(inode); 1947 1948 f2fs_up_write(&sbi->pin_sem); 1949 1950 expanded += map.m_len; 1951 sec_len -= map.m_len; 1952 map.m_lblk += map.m_len; 1953 if (!err && sec_len) 1954 goto next_alloc; 1955 1956 map.m_len = expanded; 1957 } else { 1958 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_AIO); 1959 expanded = map.m_len; 1960 } 1961 out_err: 1962 if (err) { 1963 pgoff_t last_off; 1964 1965 if (!expanded) 1966 return err; 1967 1968 last_off = pg_start + expanded - 1; 1969 1970 /* update new size to the failed position */ 1971 new_size = (last_off == pg_end) ? offset + len : 1972 (loff_t)(last_off + 1) << PAGE_SHIFT; 1973 } else { 1974 new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; 1975 } 1976 1977 if (new_size > i_size_read(inode)) { 1978 if (mode & FALLOC_FL_KEEP_SIZE) 1979 file_set_keep_isize(inode); 1980 else 1981 f2fs_i_size_write(inode, new_size); 1982 } 1983 1984 return err; 1985 } 1986 1987 static long f2fs_fallocate(struct file *file, int mode, 1988 loff_t offset, loff_t len) 1989 { 1990 struct inode *inode = file_inode(file); 1991 long ret = 0; 1992 1993 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 1994 return -EIO; 1995 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) 1996 return -ENOSPC; 1997 if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) 1998 return -EOPNOTSUPP; 1999 2000 /* f2fs only support ->fallocate for regular file */ 2001 if (!S_ISREG(inode->i_mode)) 2002 return -EINVAL; 2003 2004 if (IS_ENCRYPTED(inode) && 2005 (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) 2006 return -EOPNOTSUPP; 2007 2008 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 2009 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | 2010 FALLOC_FL_INSERT_RANGE)) 2011 return -EOPNOTSUPP; 2012 2013 inode_lock(inode); 2014 2015 /* 2016 * Pinned file should not support partial truncation since the block 2017 * can be used by applications. 2018 */ 2019 if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && 2020 (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | 2021 FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) { 2022 ret = -EOPNOTSUPP; 2023 goto out; 2024 } 2025 2026 ret = file_modified(file); 2027 if (ret) 2028 goto out; 2029 2030 /* 2031 * wait for inflight dio, blocks should be removed after IO 2032 * completion. 2033 */ 2034 inode_dio_wait(inode); 2035 2036 if (mode & FALLOC_FL_PUNCH_HOLE) { 2037 if (offset >= inode->i_size) 2038 goto out; 2039 2040 ret = f2fs_punch_hole(inode, offset, len); 2041 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { 2042 ret = f2fs_collapse_range(inode, offset, len); 2043 } else if (mode & FALLOC_FL_ZERO_RANGE) { 2044 ret = f2fs_zero_range(inode, offset, len, mode); 2045 } else if (mode & FALLOC_FL_INSERT_RANGE) { 2046 ret = f2fs_insert_range(inode, offset, len); 2047 } else { 2048 ret = f2fs_expand_inode_data(inode, offset, len, mode); 2049 } 2050 2051 if (!ret) { 2052 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 2053 f2fs_mark_inode_dirty_sync(inode, false); 2054 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2055 } 2056 2057 out: 2058 inode_unlock(inode); 2059 2060 trace_f2fs_fallocate(inode, mode, offset, len, ret); 2061 return ret; 2062 } 2063 2064 static int f2fs_release_file(struct inode *inode, struct file *filp) 2065 { 2066 if (atomic_dec_and_test(&F2FS_I(inode)->open_count)) 2067 f2fs_remove_donate_inode(inode); 2068 2069 /* 2070 * f2fs_release_file is called at every close calls. So we should 2071 * not drop any inmemory pages by close called by other process. 2072 */ 2073 if (!(filp->f_mode & FMODE_WRITE) || 2074 atomic_read(&inode->i_writecount) != 1) 2075 return 0; 2076 2077 inode_lock(inode); 2078 f2fs_abort_atomic_write(inode, true); 2079 inode_unlock(inode); 2080 2081 return 0; 2082 } 2083 2084 static int f2fs_file_flush(struct file *file, fl_owner_t id) 2085 { 2086 struct inode *inode = file_inode(file); 2087 2088 /* 2089 * If the process doing a transaction is crashed, we should do 2090 * roll-back. Otherwise, other reader/write can see corrupted database 2091 * until all the writers close its file. Since this should be done 2092 * before dropping file lock, it needs to do in ->flush. 2093 */ 2094 if (F2FS_I(inode)->atomic_write_task == current && 2095 (current->flags & PF_EXITING)) { 2096 inode_lock(inode); 2097 f2fs_abort_atomic_write(inode, true); 2098 inode_unlock(inode); 2099 } 2100 2101 return 0; 2102 } 2103 2104 static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) 2105 { 2106 struct f2fs_inode_info *fi = F2FS_I(inode); 2107 u32 masked_flags = fi->i_flags & mask; 2108 2109 /* mask can be shrunk by flags_valid selector */ 2110 iflags &= mask; 2111 2112 /* Is it quota file? Do not allow user to mess with it */ 2113 if (IS_NOQUOTA(inode)) 2114 return -EPERM; 2115 2116 if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) { 2117 if (!f2fs_sb_has_casefold(F2FS_I_SB(inode))) 2118 return -EOPNOTSUPP; 2119 if (!f2fs_empty_dir(inode)) 2120 return -ENOTEMPTY; 2121 } 2122 2123 if (iflags & (F2FS_COMPR_FL | F2FS_NOCOMP_FL)) { 2124 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 2125 return -EOPNOTSUPP; 2126 if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL)) 2127 return -EINVAL; 2128 } 2129 2130 if ((iflags ^ masked_flags) & F2FS_COMPR_FL) { 2131 if (masked_flags & F2FS_COMPR_FL) { 2132 if (!f2fs_disable_compressed_file(inode)) 2133 return -EINVAL; 2134 } else { 2135 /* try to convert inline_data to support compression */ 2136 int err = f2fs_convert_inline_inode(inode); 2137 if (err) 2138 return err; 2139 2140 f2fs_down_write(&fi->i_sem); 2141 if (!f2fs_may_compress(inode) || 2142 atomic_read(&fi->writeback) || 2143 (S_ISREG(inode->i_mode) && 2144 F2FS_HAS_BLOCKS(inode))) { 2145 f2fs_up_write(&fi->i_sem); 2146 return -EINVAL; 2147 } 2148 err = set_compress_context(inode); 2149 f2fs_up_write(&fi->i_sem); 2150 2151 if (err) 2152 return err; 2153 } 2154 } 2155 2156 fi->i_flags = iflags | (fi->i_flags & ~mask); 2157 f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) && 2158 (fi->i_flags & F2FS_NOCOMP_FL)); 2159 2160 if (fi->i_flags & F2FS_PROJINHERIT_FL) 2161 set_inode_flag(inode, FI_PROJ_INHERIT); 2162 else 2163 clear_inode_flag(inode, FI_PROJ_INHERIT); 2164 2165 inode_set_ctime_current(inode); 2166 f2fs_set_inode_flags(inode); 2167 f2fs_mark_inode_dirty_sync(inode, true); 2168 return 0; 2169 } 2170 2171 /* FS_IOC_[GS]ETFLAGS and FS_IOC_FS[GS]ETXATTR support */ 2172 2173 /* 2174 * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry 2175 * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to 2176 * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add 2177 * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL. 2178 * 2179 * Translating flags to fsx_flags value used by FS_IOC_FSGETXATTR and 2180 * FS_IOC_FSSETXATTR is done by the VFS. 2181 */ 2182 2183 static const struct { 2184 u32 iflag; 2185 u32 fsflag; 2186 } f2fs_fsflags_map[] = { 2187 { F2FS_COMPR_FL, FS_COMPR_FL }, 2188 { F2FS_SYNC_FL, FS_SYNC_FL }, 2189 { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL }, 2190 { F2FS_APPEND_FL, FS_APPEND_FL }, 2191 { F2FS_NODUMP_FL, FS_NODUMP_FL }, 2192 { F2FS_NOATIME_FL, FS_NOATIME_FL }, 2193 { F2FS_NOCOMP_FL, FS_NOCOMP_FL }, 2194 { F2FS_INDEX_FL, FS_INDEX_FL }, 2195 { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL }, 2196 { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL }, 2197 { F2FS_CASEFOLD_FL, FS_CASEFOLD_FL }, 2198 }; 2199 2200 #define F2FS_GETTABLE_FS_FL ( \ 2201 FS_COMPR_FL | \ 2202 FS_SYNC_FL | \ 2203 FS_IMMUTABLE_FL | \ 2204 FS_APPEND_FL | \ 2205 FS_NODUMP_FL | \ 2206 FS_NOATIME_FL | \ 2207 FS_NOCOMP_FL | \ 2208 FS_INDEX_FL | \ 2209 FS_DIRSYNC_FL | \ 2210 FS_PROJINHERIT_FL | \ 2211 FS_ENCRYPT_FL | \ 2212 FS_INLINE_DATA_FL | \ 2213 FS_NOCOW_FL | \ 2214 FS_VERITY_FL | \ 2215 FS_CASEFOLD_FL) 2216 2217 #define F2FS_SETTABLE_FS_FL ( \ 2218 FS_COMPR_FL | \ 2219 FS_SYNC_FL | \ 2220 FS_IMMUTABLE_FL | \ 2221 FS_APPEND_FL | \ 2222 FS_NODUMP_FL | \ 2223 FS_NOATIME_FL | \ 2224 FS_NOCOMP_FL | \ 2225 FS_DIRSYNC_FL | \ 2226 FS_PROJINHERIT_FL | \ 2227 FS_CASEFOLD_FL) 2228 2229 /* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */ 2230 static inline u32 f2fs_iflags_to_fsflags(u32 iflags) 2231 { 2232 u32 fsflags = 0; 2233 int i; 2234 2235 for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) 2236 if (iflags & f2fs_fsflags_map[i].iflag) 2237 fsflags |= f2fs_fsflags_map[i].fsflag; 2238 2239 return fsflags; 2240 } 2241 2242 /* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */ 2243 static inline u32 f2fs_fsflags_to_iflags(u32 fsflags) 2244 { 2245 u32 iflags = 0; 2246 int i; 2247 2248 for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) 2249 if (fsflags & f2fs_fsflags_map[i].fsflag) 2250 iflags |= f2fs_fsflags_map[i].iflag; 2251 2252 return iflags; 2253 } 2254 2255 static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) 2256 { 2257 struct inode *inode = file_inode(filp); 2258 2259 return put_user(inode->i_generation, (int __user *)arg); 2260 } 2261 2262 static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) 2263 { 2264 struct inode *inode = file_inode(filp); 2265 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2266 struct f2fs_inode_info *fi = F2FS_I(inode); 2267 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2268 loff_t isize; 2269 int ret; 2270 2271 if (!(filp->f_mode & FMODE_WRITE)) 2272 return -EBADF; 2273 2274 if (!inode_owner_or_capable(idmap, inode)) 2275 return -EACCES; 2276 2277 if (!S_ISREG(inode->i_mode)) 2278 return -EINVAL; 2279 2280 if (filp->f_flags & O_DIRECT) 2281 return -EINVAL; 2282 2283 ret = mnt_want_write_file(filp); 2284 if (ret) 2285 return ret; 2286 2287 inode_lock(inode); 2288 2289 if (!f2fs_disable_compressed_file(inode) || 2290 f2fs_is_pinned_file(inode)) { 2291 ret = -EINVAL; 2292 goto out; 2293 } 2294 2295 if (f2fs_is_atomic_file(inode)) 2296 goto out; 2297 2298 ret = f2fs_convert_inline_inode(inode); 2299 if (ret) 2300 goto out; 2301 2302 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 2303 f2fs_down_write(&fi->i_gc_rwsem[READ]); 2304 2305 /* 2306 * Should wait end_io to count F2FS_WB_CP_DATA correctly by 2307 * f2fs_is_atomic_file. 2308 */ 2309 if (get_dirty_pages(inode)) 2310 f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%llu, npages=%u", 2311 inode->i_ino, get_dirty_pages(inode)); 2312 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 2313 if (ret) 2314 goto out_unlock; 2315 2316 /* Check if the inode already has a COW inode */ 2317 if (fi->cow_inode == NULL) { 2318 /* Create a COW inode for atomic write */ 2319 struct dentry *dentry = file_dentry(filp); 2320 struct inode *dir = d_inode(dentry->d_parent); 2321 2322 ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode); 2323 if (ret) 2324 goto out_unlock; 2325 2326 set_inode_flag(fi->cow_inode, FI_COW_FILE); 2327 clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); 2328 2329 /* Set the COW inode's atomic_inode to the atomic inode */ 2330 F2FS_I(fi->cow_inode)->atomic_inode = inode; 2331 } else { 2332 /* Reuse the already created COW inode */ 2333 f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode)); 2334 2335 invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1); 2336 2337 ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); 2338 if (ret) 2339 goto out_unlock; 2340 } 2341 2342 f2fs_write_inode(inode, NULL); 2343 2344 stat_inc_atomic_inode(inode); 2345 2346 set_inode_flag(inode, FI_ATOMIC_FILE); 2347 2348 isize = i_size_read(inode); 2349 fi->original_i_size = isize; 2350 if (truncate) { 2351 set_inode_flag(inode, FI_ATOMIC_REPLACE); 2352 truncate_inode_pages_final(inode->i_mapping); 2353 f2fs_i_size_write(inode, 0); 2354 isize = 0; 2355 } 2356 f2fs_i_size_write(fi->cow_inode, isize); 2357 2358 out_unlock: 2359 f2fs_up_write(&fi->i_gc_rwsem[READ]); 2360 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 2361 if (ret) 2362 goto out; 2363 2364 f2fs_update_time(sbi, REQ_TIME); 2365 fi->atomic_write_task = current; 2366 stat_update_max_atomic_write(inode); 2367 fi->atomic_write_cnt = 0; 2368 out: 2369 inode_unlock(inode); 2370 mnt_drop_write_file(filp); 2371 return ret; 2372 } 2373 2374 static int f2fs_ioc_commit_atomic_write(struct file *filp) 2375 { 2376 struct inode *inode = file_inode(filp); 2377 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2378 int ret; 2379 2380 if (!(filp->f_mode & FMODE_WRITE)) 2381 return -EBADF; 2382 2383 if (!inode_owner_or_capable(idmap, inode)) 2384 return -EACCES; 2385 2386 ret = mnt_want_write_file(filp); 2387 if (ret) 2388 return ret; 2389 2390 f2fs_balance_fs(F2FS_I_SB(inode), true); 2391 2392 inode_lock(inode); 2393 2394 if (f2fs_is_atomic_file(inode)) { 2395 ret = f2fs_commit_atomic_write(inode); 2396 if (!ret) 2397 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); 2398 2399 f2fs_abort_atomic_write(inode, ret); 2400 } else { 2401 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); 2402 } 2403 2404 inode_unlock(inode); 2405 mnt_drop_write_file(filp); 2406 return ret; 2407 } 2408 2409 static int f2fs_ioc_abort_atomic_write(struct file *filp) 2410 { 2411 struct inode *inode = file_inode(filp); 2412 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2413 int ret; 2414 2415 if (!(filp->f_mode & FMODE_WRITE)) 2416 return -EBADF; 2417 2418 if (!inode_owner_or_capable(idmap, inode)) 2419 return -EACCES; 2420 2421 ret = mnt_want_write_file(filp); 2422 if (ret) 2423 return ret; 2424 2425 inode_lock(inode); 2426 2427 f2fs_abort_atomic_write(inode, true); 2428 2429 inode_unlock(inode); 2430 2431 mnt_drop_write_file(filp); 2432 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2433 return ret; 2434 } 2435 2436 int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, 2437 bool readonly, bool need_lock) 2438 { 2439 struct super_block *sb = sbi->sb; 2440 int ret = 0; 2441 2442 switch (flag) { 2443 case F2FS_GOING_DOWN_FULLSYNC: 2444 ret = bdev_freeze(sb->s_bdev); 2445 if (ret) 2446 goto out; 2447 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2448 bdev_thaw(sb->s_bdev); 2449 break; 2450 case F2FS_GOING_DOWN_METASYNC: 2451 /* do checkpoint only */ 2452 ret = f2fs_sync_fs(sb, 1); 2453 if (ret) { 2454 if (ret == -EIO) 2455 ret = 0; 2456 goto out; 2457 } 2458 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2459 break; 2460 case F2FS_GOING_DOWN_NOSYNC: 2461 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2462 break; 2463 case F2FS_GOING_DOWN_METAFLUSH: 2464 f2fs_sync_meta_pages(sbi, LONG_MAX, FS_META_IO); 2465 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2466 break; 2467 case F2FS_GOING_DOWN_NEED_FSCK: 2468 set_sbi_flag(sbi, SBI_NEED_FSCK); 2469 set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); 2470 set_sbi_flag(sbi, SBI_IS_DIRTY); 2471 /* do checkpoint only */ 2472 ret = f2fs_sync_fs(sb, 1); 2473 if (ret == -EIO) 2474 ret = 0; 2475 goto out; 2476 default: 2477 ret = -EINVAL; 2478 goto out; 2479 } 2480 2481 if (readonly) 2482 goto out; 2483 2484 /* 2485 * grab sb->s_umount to avoid racing w/ remount() and other shutdown 2486 * paths. 2487 */ 2488 if (need_lock) 2489 down_write(&sbi->sb->s_umount); 2490 2491 f2fs_stop_gc_thread(sbi); 2492 f2fs_stop_discard_thread(sbi); 2493 2494 f2fs_drop_discard_cmd(sbi); 2495 clear_opt(sbi, DISCARD); 2496 2497 if (need_lock) 2498 up_write(&sbi->sb->s_umount); 2499 2500 f2fs_update_time(sbi, REQ_TIME); 2501 out: 2502 2503 trace_f2fs_shutdown(sbi, flag, ret); 2504 2505 return ret; 2506 } 2507 2508 static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) 2509 { 2510 struct inode *inode = file_inode(filp); 2511 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2512 __u32 in; 2513 int ret; 2514 bool need_drop = false, readonly = false; 2515 2516 if (!capable(CAP_SYS_ADMIN)) 2517 return -EPERM; 2518 2519 if (get_user(in, (__u32 __user *)arg)) 2520 return -EFAULT; 2521 2522 if (in != F2FS_GOING_DOWN_FULLSYNC) { 2523 ret = mnt_want_write_file(filp); 2524 if (ret) { 2525 if (ret != -EROFS) 2526 return ret; 2527 2528 /* fallback to nosync shutdown for readonly fs */ 2529 in = F2FS_GOING_DOWN_NOSYNC; 2530 readonly = true; 2531 } else { 2532 need_drop = true; 2533 } 2534 } 2535 2536 ret = f2fs_do_shutdown(sbi, in, readonly, true); 2537 2538 if (need_drop) 2539 mnt_drop_write_file(filp); 2540 2541 return ret; 2542 } 2543 2544 static int f2fs_keep_noreuse_range(struct inode *inode, 2545 loff_t offset, loff_t len) 2546 { 2547 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2548 u64 max_bytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 2549 u64 start, end; 2550 int ret = 0; 2551 2552 if (!S_ISREG(inode->i_mode)) 2553 return 0; 2554 2555 if (offset >= max_bytes || len > max_bytes || 2556 (offset + len) > max_bytes) 2557 return 0; 2558 2559 start = offset >> PAGE_SHIFT; 2560 end = DIV_ROUND_UP(offset + len, PAGE_SIZE); 2561 2562 inode_lock(inode); 2563 if (f2fs_is_atomic_file(inode)) { 2564 inode_unlock(inode); 2565 return 0; 2566 } 2567 2568 spin_lock(&sbi->inode_lock[DONATE_INODE]); 2569 /* let's remove the range, if len = 0 */ 2570 if (!len) { 2571 if (!list_empty(&F2FS_I(inode)->gdonate_list)) { 2572 list_del_init(&F2FS_I(inode)->gdonate_list); 2573 sbi->donate_files--; 2574 if (is_inode_flag_set(inode, FI_DONATE_FINISHED)) 2575 ret = -EALREADY; 2576 else 2577 set_inode_flag(inode, FI_DONATE_FINISHED); 2578 } else 2579 ret = -ENOENT; 2580 } else { 2581 if (list_empty(&F2FS_I(inode)->gdonate_list)) { 2582 list_add_tail(&F2FS_I(inode)->gdonate_list, 2583 &sbi->inode_list[DONATE_INODE]); 2584 sbi->donate_files++; 2585 } else { 2586 list_move_tail(&F2FS_I(inode)->gdonate_list, 2587 &sbi->inode_list[DONATE_INODE]); 2588 } 2589 F2FS_I(inode)->donate_start = start; 2590 F2FS_I(inode)->donate_end = end - 1; 2591 clear_inode_flag(inode, FI_DONATE_FINISHED); 2592 } 2593 spin_unlock(&sbi->inode_lock[DONATE_INODE]); 2594 inode_unlock(inode); 2595 2596 return ret; 2597 } 2598 2599 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) 2600 { 2601 struct inode *inode = file_inode(filp); 2602 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2603 struct fstrim_range range; 2604 int ret; 2605 2606 if (!capable(CAP_SYS_ADMIN)) 2607 return -EPERM; 2608 2609 if (!f2fs_hw_support_discard(sbi)) 2610 return -EOPNOTSUPP; 2611 2612 if (copy_from_user(&range, (struct fstrim_range __user *)arg, 2613 sizeof(range))) 2614 return -EFAULT; 2615 2616 ret = mnt_want_write_file(filp); 2617 if (ret) 2618 return ret; 2619 2620 range.minlen = max_t(unsigned int, range.minlen, 2621 f2fs_hw_discard_granularity(sbi)); 2622 ret = f2fs_trim_fs(sbi, &range); 2623 mnt_drop_write_file(filp); 2624 if (ret < 0) 2625 return ret; 2626 2627 if (copy_to_user((struct fstrim_range __user *)arg, &range, 2628 sizeof(range))) 2629 return -EFAULT; 2630 f2fs_update_time(sbi, REQ_TIME); 2631 return 0; 2632 } 2633 2634 static bool uuid_is_nonzero(__u8 u[16]) 2635 { 2636 int i; 2637 2638 for (i = 0; i < 16; i++) 2639 if (u[i]) 2640 return true; 2641 return false; 2642 } 2643 2644 static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) 2645 { 2646 struct inode *inode = file_inode(filp); 2647 int ret; 2648 2649 if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode))) 2650 return -EOPNOTSUPP; 2651 2652 ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg); 2653 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2654 return ret; 2655 } 2656 2657 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) 2658 { 2659 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2660 return -EOPNOTSUPP; 2661 return fscrypt_ioctl_get_policy(filp, (void __user *)arg); 2662 } 2663 2664 static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) 2665 { 2666 struct inode *inode = file_inode(filp); 2667 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2668 u8 encrypt_pw_salt[16]; 2669 int err; 2670 2671 if (!f2fs_sb_has_encrypt(sbi)) 2672 return -EOPNOTSUPP; 2673 2674 err = mnt_want_write_file(filp); 2675 if (err) 2676 return err; 2677 2678 f2fs_down_write(&sbi->sb_lock); 2679 2680 if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) 2681 goto got_it; 2682 2683 /* update superblock with uuid */ 2684 generate_random_uuid(sbi->raw_super->encrypt_pw_salt); 2685 2686 err = f2fs_commit_super(sbi, false); 2687 if (err) { 2688 /* undo new data */ 2689 memset(sbi->raw_super->encrypt_pw_salt, 0, 16); 2690 goto out_err; 2691 } 2692 got_it: 2693 memcpy(encrypt_pw_salt, sbi->raw_super->encrypt_pw_salt, 16); 2694 out_err: 2695 f2fs_up_write(&sbi->sb_lock); 2696 mnt_drop_write_file(filp); 2697 2698 if (!err && copy_to_user((__u8 __user *)arg, encrypt_pw_salt, 16)) 2699 err = -EFAULT; 2700 2701 return err; 2702 } 2703 2704 static int f2fs_ioc_get_encryption_policy_ex(struct file *filp, 2705 unsigned long arg) 2706 { 2707 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2708 return -EOPNOTSUPP; 2709 2710 return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg); 2711 } 2712 2713 static int f2fs_ioc_add_encryption_key(struct file *filp, unsigned long arg) 2714 { 2715 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2716 return -EOPNOTSUPP; 2717 2718 return fscrypt_ioctl_add_key(filp, (void __user *)arg); 2719 } 2720 2721 static int f2fs_ioc_remove_encryption_key(struct file *filp, unsigned long arg) 2722 { 2723 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2724 return -EOPNOTSUPP; 2725 2726 return fscrypt_ioctl_remove_key(filp, (void __user *)arg); 2727 } 2728 2729 static int f2fs_ioc_remove_encryption_key_all_users(struct file *filp, 2730 unsigned long arg) 2731 { 2732 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2733 return -EOPNOTSUPP; 2734 2735 return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg); 2736 } 2737 2738 static int f2fs_ioc_get_encryption_key_status(struct file *filp, 2739 unsigned long arg) 2740 { 2741 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2742 return -EOPNOTSUPP; 2743 2744 return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); 2745 } 2746 2747 static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg) 2748 { 2749 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2750 return -EOPNOTSUPP; 2751 2752 return fscrypt_ioctl_get_nonce(filp, (void __user *)arg); 2753 } 2754 2755 static int f2fs_ioc_gc(struct file *filp, unsigned long arg) 2756 { 2757 struct inode *inode = file_inode(filp); 2758 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2759 struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, 2760 .no_bg_gc = false, 2761 .should_migrate_blocks = false, 2762 .nr_free_secs = 0 }; 2763 __u32 sync; 2764 int ret; 2765 2766 if (!capable(CAP_SYS_ADMIN)) 2767 return -EPERM; 2768 2769 if (get_user(sync, (__u32 __user *)arg)) 2770 return -EFAULT; 2771 2772 if (f2fs_readonly(sbi->sb)) 2773 return -EROFS; 2774 2775 ret = mnt_want_write_file(filp); 2776 if (ret) 2777 return ret; 2778 2779 if (!sync) { 2780 if (!f2fs_down_write_trylock_trace(&sbi->gc_lock, 2781 &gc_control.lc)) { 2782 ret = -EBUSY; 2783 goto out; 2784 } 2785 } else { 2786 f2fs_down_write_trace(&sbi->gc_lock, &gc_control.lc); 2787 } 2788 2789 gc_control.init_gc_type = sync ? FG_GC : BG_GC; 2790 gc_control.err_gc_skipped = sync; 2791 stat_inc_gc_call_count(sbi, FOREGROUND); 2792 ret = f2fs_gc(sbi, &gc_control); 2793 out: 2794 mnt_drop_write_file(filp); 2795 return ret; 2796 } 2797 2798 static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range) 2799 { 2800 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); 2801 struct f2fs_gc_control gc_control = { 2802 .init_gc_type = range->sync ? FG_GC : BG_GC, 2803 .no_bg_gc = false, 2804 .should_migrate_blocks = false, 2805 .err_gc_skipped = range->sync, 2806 .nr_free_secs = 0 }; 2807 u64 end; 2808 int ret; 2809 2810 if (!capable(CAP_SYS_ADMIN)) 2811 return -EPERM; 2812 if (f2fs_readonly(sbi->sb)) 2813 return -EROFS; 2814 2815 end = range->start + range->len; 2816 if (end < range->start || range->start < MAIN_BLKADDR(sbi) || 2817 end >= MAX_BLKADDR(sbi)) 2818 return -EINVAL; 2819 2820 ret = mnt_want_write_file(filp); 2821 if (ret) 2822 return ret; 2823 2824 do_more: 2825 if (!range->sync) { 2826 if (!f2fs_down_write_trylock_trace(&sbi->gc_lock, &gc_control.lc)) { 2827 ret = -EBUSY; 2828 goto out; 2829 } 2830 } else { 2831 f2fs_down_write_trace(&sbi->gc_lock, &gc_control.lc); 2832 } 2833 2834 gc_control.victim_segno = GET_SEGNO(sbi, range->start); 2835 stat_inc_gc_call_count(sbi, FOREGROUND); 2836 ret = f2fs_gc(sbi, &gc_control); 2837 if (ret) { 2838 if (ret == -EBUSY) 2839 ret = -EAGAIN; 2840 goto out; 2841 } 2842 range->start += CAP_BLKS_PER_SEC(sbi); 2843 if (range->start <= end) 2844 goto do_more; 2845 out: 2846 mnt_drop_write_file(filp); 2847 return ret; 2848 } 2849 2850 static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) 2851 { 2852 struct f2fs_gc_range range; 2853 2854 if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg, 2855 sizeof(range))) 2856 return -EFAULT; 2857 return __f2fs_ioc_gc_range(filp, &range); 2858 } 2859 2860 static int f2fs_ioc_write_checkpoint(struct file *filp) 2861 { 2862 struct inode *inode = file_inode(filp); 2863 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2864 int ret; 2865 2866 if (!capable(CAP_SYS_ADMIN)) 2867 return -EPERM; 2868 2869 if (f2fs_readonly(sbi->sb)) 2870 return -EROFS; 2871 2872 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 2873 f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled."); 2874 return -EINVAL; 2875 } 2876 2877 ret = mnt_want_write_file(filp); 2878 if (ret) 2879 return ret; 2880 2881 ret = f2fs_sync_fs(sbi->sb, 1); 2882 2883 mnt_drop_write_file(filp); 2884 return ret; 2885 } 2886 2887 static int f2fs_defragment_range(struct f2fs_sb_info *sbi, 2888 struct file *filp, 2889 struct f2fs_defragment *range) 2890 { 2891 struct inode *inode = file_inode(filp); 2892 struct f2fs_map_blocks map = { .m_next_extent = NULL, 2893 .m_seg_type = NO_CHECK_TYPE, 2894 .m_may_create = false }; 2895 struct extent_info ei = {}; 2896 pgoff_t pg_start, pg_end, next_pgofs; 2897 unsigned int total = 0, sec_num; 2898 block_t blk_end = 0; 2899 bool fragmented = false; 2900 int err; 2901 2902 f2fs_balance_fs(sbi, true); 2903 2904 inode_lock(inode); 2905 pg_start = range->start >> PAGE_SHIFT; 2906 pg_end = min_t(pgoff_t, 2907 (range->start + range->len) >> PAGE_SHIFT, 2908 DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE)); 2909 2910 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) || 2911 f2fs_is_atomic_file(inode)) { 2912 err = -EINVAL; 2913 goto unlock_out; 2914 } 2915 2916 /* if in-place-update policy is enabled, don't waste time here */ 2917 set_inode_flag(inode, FI_OPU_WRITE); 2918 if (f2fs_should_update_inplace(inode, NULL)) { 2919 err = -EINVAL; 2920 goto out; 2921 } 2922 2923 /* writeback all dirty pages in the range */ 2924 err = filemap_write_and_wait_range(inode->i_mapping, 2925 pg_start << PAGE_SHIFT, 2926 (pg_end << PAGE_SHIFT) - 1); 2927 if (err) 2928 goto out; 2929 2930 /* 2931 * lookup mapping info in extent cache, skip defragmenting if physical 2932 * block addresses are continuous. 2933 */ 2934 if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) { 2935 if ((pgoff_t)ei.fofs + ei.len >= pg_end) 2936 goto out; 2937 } 2938 2939 map.m_lblk = pg_start; 2940 map.m_next_pgofs = &next_pgofs; 2941 2942 /* 2943 * lookup mapping info in dnode page cache, skip defragmenting if all 2944 * physical block addresses are continuous even if there are hole(s) 2945 * in logical blocks. 2946 */ 2947 while (map.m_lblk < pg_end) { 2948 map.m_len = pg_end - map.m_lblk; 2949 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); 2950 if (err) 2951 goto out; 2952 2953 if (!(map.m_flags & F2FS_MAP_FLAGS)) { 2954 map.m_lblk = next_pgofs; 2955 continue; 2956 } 2957 2958 if (blk_end && blk_end != map.m_pblk) 2959 fragmented = true; 2960 2961 /* record total count of block that we're going to move */ 2962 total += map.m_len; 2963 2964 blk_end = map.m_pblk + map.m_len; 2965 2966 map.m_lblk += map.m_len; 2967 } 2968 2969 if (!fragmented) { 2970 total = 0; 2971 goto out; 2972 } 2973 2974 sec_num = DIV_ROUND_UP(total, CAP_BLKS_PER_SEC(sbi)); 2975 2976 /* 2977 * make sure there are enough free section for LFS allocation, this can 2978 * avoid defragment running in SSR mode when free section are allocated 2979 * intensively 2980 */ 2981 if (has_not_enough_free_secs(sbi, 0, sec_num)) { 2982 err = -EAGAIN; 2983 goto out; 2984 } 2985 2986 map.m_lblk = pg_start; 2987 map.m_len = pg_end - pg_start; 2988 total = 0; 2989 2990 while (map.m_lblk < pg_end) { 2991 pgoff_t idx; 2992 int cnt = 0; 2993 2994 do_map: 2995 map.m_len = pg_end - map.m_lblk; 2996 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); 2997 if (err) 2998 goto clear_out; 2999 3000 if (!(map.m_flags & F2FS_MAP_FLAGS)) { 3001 map.m_lblk = next_pgofs; 3002 goto check; 3003 } 3004 3005 set_inode_flag(inode, FI_SKIP_WRITES); 3006 3007 idx = map.m_lblk; 3008 while (idx < map.m_lblk + map.m_len && 3009 cnt < BLKS_PER_SEG(sbi)) { 3010 struct folio *folio; 3011 3012 folio = f2fs_get_lock_data_folio(inode, idx, true); 3013 if (IS_ERR(folio)) { 3014 err = PTR_ERR(folio); 3015 goto clear_out; 3016 } 3017 3018 f2fs_folio_wait_writeback(folio, DATA, true, true); 3019 3020 folio_mark_dirty(folio); 3021 folio_set_f2fs_gcing(folio); 3022 f2fs_folio_put(folio, true); 3023 3024 idx++; 3025 cnt++; 3026 total++; 3027 } 3028 3029 map.m_lblk = idx; 3030 check: 3031 if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi)) 3032 goto do_map; 3033 3034 clear_inode_flag(inode, FI_SKIP_WRITES); 3035 3036 err = filemap_fdatawrite(inode->i_mapping); 3037 if (err) 3038 goto out; 3039 } 3040 clear_out: 3041 clear_inode_flag(inode, FI_SKIP_WRITES); 3042 out: 3043 clear_inode_flag(inode, FI_OPU_WRITE); 3044 unlock_out: 3045 inode_unlock(inode); 3046 if (!err) 3047 range->len = (u64)total << PAGE_SHIFT; 3048 return err; 3049 } 3050 3051 static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) 3052 { 3053 struct inode *inode = file_inode(filp); 3054 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3055 struct f2fs_defragment range; 3056 int err; 3057 3058 if (!capable(CAP_SYS_ADMIN)) 3059 return -EPERM; 3060 3061 if (!S_ISREG(inode->i_mode)) 3062 return -EINVAL; 3063 3064 if (f2fs_readonly(sbi->sb)) 3065 return -EROFS; 3066 3067 if (copy_from_user(&range, (struct f2fs_defragment __user *)arg, 3068 sizeof(range))) 3069 return -EFAULT; 3070 3071 /* verify alignment of offset & size */ 3072 if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1)) 3073 return -EINVAL; 3074 3075 if (unlikely((range.start + range.len) >> PAGE_SHIFT > 3076 max_file_blocks(inode))) 3077 return -EINVAL; 3078 3079 err = mnt_want_write_file(filp); 3080 if (err) 3081 return err; 3082 3083 err = f2fs_defragment_range(sbi, filp, &range); 3084 mnt_drop_write_file(filp); 3085 3086 if (range.len) 3087 f2fs_update_time(sbi, REQ_TIME); 3088 if (err < 0) 3089 return err; 3090 3091 if (copy_to_user((struct f2fs_defragment __user *)arg, &range, 3092 sizeof(range))) 3093 return -EFAULT; 3094 3095 return 0; 3096 } 3097 3098 static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, 3099 struct file *file_out, loff_t pos_out, size_t len) 3100 { 3101 struct inode *src = file_inode(file_in); 3102 struct inode *dst = file_inode(file_out); 3103 struct f2fs_sb_info *sbi = F2FS_I_SB(src); 3104 struct f2fs_lock_context lc; 3105 size_t olen = len, dst_max_i_size = 0; 3106 size_t dst_osize; 3107 int ret; 3108 3109 if (file_in->f_path.mnt != file_out->f_path.mnt || 3110 src->i_sb != dst->i_sb) 3111 return -EXDEV; 3112 3113 if (unlikely(f2fs_readonly(src->i_sb))) 3114 return -EROFS; 3115 3116 if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode)) 3117 return -EINVAL; 3118 3119 if (IS_ENCRYPTED(src) || IS_ENCRYPTED(dst)) 3120 return -EOPNOTSUPP; 3121 3122 if (pos_out < 0 || pos_in < 0) 3123 return -EINVAL; 3124 3125 if (src == dst) { 3126 if (pos_in == pos_out) 3127 return 0; 3128 if (pos_out > pos_in && pos_out < pos_in + len) 3129 return -EINVAL; 3130 } 3131 3132 inode_lock(src); 3133 if (src != dst) { 3134 ret = -EBUSY; 3135 if (!inode_trylock(dst)) 3136 goto out; 3137 } 3138 3139 if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) || 3140 f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) { 3141 ret = -EOPNOTSUPP; 3142 goto out_unlock; 3143 } 3144 3145 if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) { 3146 ret = -EINVAL; 3147 goto out_unlock; 3148 } 3149 3150 ret = -EINVAL; 3151 if (pos_in + len > src->i_size || pos_in + len < pos_in) 3152 goto out_unlock; 3153 if (len == 0) 3154 olen = len = src->i_size - pos_in; 3155 if (pos_in + len == src->i_size) 3156 len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in; 3157 if (len == 0) { 3158 ret = 0; 3159 goto out_unlock; 3160 } 3161 3162 dst_osize = dst->i_size; 3163 if (pos_out + olen > dst->i_size) 3164 dst_max_i_size = pos_out + olen; 3165 3166 /* verify the end result is block aligned */ 3167 if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) || 3168 !IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) || 3169 !IS_ALIGNED(pos_out, F2FS_BLKSIZE)) 3170 goto out_unlock; 3171 3172 ret = f2fs_convert_inline_inode(src); 3173 if (ret) 3174 goto out_unlock; 3175 3176 ret = f2fs_convert_inline_inode(dst); 3177 if (ret) 3178 goto out_unlock; 3179 3180 /* write out all dirty pages from offset */ 3181 ret = filemap_write_and_wait_range(src->i_mapping, 3182 pos_in, pos_in + len); 3183 if (ret) 3184 goto out_unlock; 3185 3186 ret = filemap_write_and_wait_range(dst->i_mapping, 3187 pos_out, pos_out + len); 3188 if (ret) 3189 goto out_unlock; 3190 3191 f2fs_balance_fs(sbi, true); 3192 3193 f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); 3194 if (src != dst) { 3195 ret = -EBUSY; 3196 if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) 3197 goto out_src; 3198 } 3199 3200 f2fs_lock_op(sbi, &lc); 3201 ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in), 3202 F2FS_BYTES_TO_BLK(pos_out), 3203 F2FS_BYTES_TO_BLK(len), false); 3204 3205 if (!ret) { 3206 if (dst_max_i_size) 3207 f2fs_i_size_write(dst, dst_max_i_size); 3208 else if (dst_osize != dst->i_size) 3209 f2fs_i_size_write(dst, dst_osize); 3210 } 3211 f2fs_unlock_op(sbi, &lc); 3212 3213 if (src != dst) 3214 f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); 3215 out_src: 3216 f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); 3217 if (ret) 3218 goto out_unlock; 3219 3220 inode_set_mtime_to_ts(src, inode_set_ctime_current(src)); 3221 f2fs_mark_inode_dirty_sync(src, false); 3222 if (src != dst) { 3223 inode_set_mtime_to_ts(dst, inode_set_ctime_current(dst)); 3224 f2fs_mark_inode_dirty_sync(dst, false); 3225 } 3226 f2fs_update_time(sbi, REQ_TIME); 3227 3228 out_unlock: 3229 if (src != dst) 3230 inode_unlock(dst); 3231 out: 3232 inode_unlock(src); 3233 return ret; 3234 } 3235 3236 static int __f2fs_ioc_move_range(struct file *filp, 3237 struct f2fs_move_range *range) 3238 { 3239 int err; 3240 3241 if (!(filp->f_mode & FMODE_READ) || 3242 !(filp->f_mode & FMODE_WRITE)) 3243 return -EBADF; 3244 3245 CLASS(fd, dst)(range->dst_fd); 3246 if (fd_empty(dst)) 3247 return -EBADF; 3248 3249 if (!(fd_file(dst)->f_mode & FMODE_WRITE)) 3250 return -EBADF; 3251 3252 err = mnt_want_write_file(filp); 3253 if (err) 3254 return err; 3255 3256 err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst), 3257 range->pos_out, range->len); 3258 3259 mnt_drop_write_file(filp); 3260 return err; 3261 } 3262 3263 static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) 3264 { 3265 struct f2fs_move_range range; 3266 3267 if (copy_from_user(&range, (struct f2fs_move_range __user *)arg, 3268 sizeof(range))) 3269 return -EFAULT; 3270 return __f2fs_ioc_move_range(filp, &range); 3271 } 3272 3273 static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) 3274 { 3275 struct inode *inode = file_inode(filp); 3276 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3277 struct sit_info *sm = SIT_I(sbi); 3278 unsigned int start_segno = 0, end_segno = 0; 3279 unsigned int dev_start_segno = 0, dev_end_segno = 0; 3280 struct f2fs_flush_device range; 3281 struct f2fs_gc_control gc_control = { 3282 .init_gc_type = FG_GC, 3283 .should_migrate_blocks = true, 3284 .err_gc_skipped = true, 3285 .nr_free_secs = 0 }; 3286 int ret; 3287 3288 if (!capable(CAP_SYS_ADMIN)) 3289 return -EPERM; 3290 3291 if (f2fs_readonly(sbi->sb)) 3292 return -EROFS; 3293 3294 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) 3295 return -EINVAL; 3296 3297 if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, 3298 sizeof(range))) 3299 return -EFAULT; 3300 3301 if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num || 3302 __is_large_section(sbi)) { 3303 f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1", 3304 range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi)); 3305 return -EINVAL; 3306 } 3307 3308 ret = mnt_want_write_file(filp); 3309 if (ret) 3310 return ret; 3311 3312 if (range.dev_num != 0) 3313 dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk); 3314 dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk); 3315 3316 start_segno = sm->last_victim[FLUSH_DEVICE]; 3317 if (start_segno < dev_start_segno || start_segno >= dev_end_segno) 3318 start_segno = dev_start_segno; 3319 end_segno = min(start_segno + range.segments, dev_end_segno); 3320 3321 while (start_segno < end_segno) { 3322 if (!f2fs_down_write_trylock_trace(&sbi->gc_lock, &gc_control.lc)) { 3323 ret = -EBUSY; 3324 goto out; 3325 } 3326 sm->last_victim[GC_CB] = end_segno + 1; 3327 sm->last_victim[GC_GREEDY] = end_segno + 1; 3328 sm->last_victim[ALLOC_NEXT] = end_segno + 1; 3329 3330 gc_control.victim_segno = start_segno; 3331 stat_inc_gc_call_count(sbi, FOREGROUND); 3332 ret = f2fs_gc(sbi, &gc_control); 3333 if (ret == -EAGAIN) 3334 ret = 0; 3335 else if (ret < 0) 3336 break; 3337 start_segno++; 3338 } 3339 out: 3340 mnt_drop_write_file(filp); 3341 return ret; 3342 } 3343 3344 static int f2fs_ioc_get_features(struct file *filp, unsigned long arg) 3345 { 3346 struct inode *inode = file_inode(filp); 3347 u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature); 3348 3349 /* Must validate to set it with SQLite behavior in Android. */ 3350 sb_feature |= F2FS_FEATURE_ATOMIC_WRITE; 3351 3352 return put_user(sb_feature, (u32 __user *)arg); 3353 } 3354 3355 #ifdef CONFIG_QUOTA 3356 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) 3357 { 3358 struct dquot *transfer_to[MAXQUOTAS] = {}; 3359 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3360 struct super_block *sb = sbi->sb; 3361 int err; 3362 3363 transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); 3364 if (IS_ERR(transfer_to[PRJQUOTA])) 3365 return PTR_ERR(transfer_to[PRJQUOTA]); 3366 3367 err = __dquot_transfer(inode, transfer_to); 3368 if (err) 3369 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); 3370 dqput(transfer_to[PRJQUOTA]); 3371 return err; 3372 } 3373 3374 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) 3375 { 3376 struct f2fs_inode_info *fi = F2FS_I(inode); 3377 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3378 struct f2fs_inode *ri = NULL; 3379 struct f2fs_lock_context lc; 3380 kprojid_t kprojid; 3381 int err; 3382 3383 if (!f2fs_sb_has_project_quota(sbi)) { 3384 if (projid != F2FS_DEF_PROJID) 3385 return -EOPNOTSUPP; 3386 else 3387 return 0; 3388 } 3389 3390 if (!f2fs_has_extra_attr(inode)) 3391 return -EOPNOTSUPP; 3392 3393 kprojid = make_kprojid(&init_user_ns, (projid_t)projid); 3394 3395 if (projid_eq(kprojid, fi->i_projid)) 3396 return 0; 3397 3398 err = -EPERM; 3399 /* Is it quota file? Do not allow user to mess with it */ 3400 if (IS_NOQUOTA(inode)) 3401 return err; 3402 3403 if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) 3404 return -EOVERFLOW; 3405 3406 err = f2fs_dquot_initialize(inode); 3407 if (err) 3408 return err; 3409 3410 f2fs_lock_op(sbi, &lc); 3411 err = f2fs_transfer_project_quota(inode, kprojid); 3412 if (err) 3413 goto out_unlock; 3414 3415 fi->i_projid = kprojid; 3416 inode_set_ctime_current(inode); 3417 f2fs_mark_inode_dirty_sync(inode, true); 3418 out_unlock: 3419 f2fs_unlock_op(sbi, &lc); 3420 return err; 3421 } 3422 #else 3423 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) 3424 { 3425 return 0; 3426 } 3427 3428 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) 3429 { 3430 if (projid != F2FS_DEF_PROJID) 3431 return -EOPNOTSUPP; 3432 return 0; 3433 } 3434 #endif 3435 3436 int f2fs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) 3437 { 3438 struct inode *inode = d_inode(dentry); 3439 struct f2fs_inode_info *fi = F2FS_I(inode); 3440 u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags); 3441 3442 if (IS_ENCRYPTED(inode)) 3443 fsflags |= FS_ENCRYPT_FL; 3444 if (IS_VERITY(inode)) 3445 fsflags |= FS_VERITY_FL; 3446 if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) 3447 fsflags |= FS_INLINE_DATA_FL; 3448 if (is_inode_flag_set(inode, FI_PIN_FILE)) 3449 fsflags |= FS_NOCOW_FL; 3450 3451 fileattr_fill_flags(fa, fsflags & F2FS_GETTABLE_FS_FL); 3452 3453 if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) 3454 fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid); 3455 3456 return 0; 3457 } 3458 3459 int f2fs_fileattr_set(struct mnt_idmap *idmap, 3460 struct dentry *dentry, struct file_kattr *fa) 3461 { 3462 struct inode *inode = d_inode(dentry); 3463 u32 fsflags = fa->flags, mask = F2FS_SETTABLE_FS_FL; 3464 u32 iflags; 3465 int err; 3466 3467 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 3468 return -EIO; 3469 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) 3470 return -ENOSPC; 3471 if (fsflags & ~F2FS_GETTABLE_FS_FL) 3472 return -EOPNOTSUPP; 3473 fsflags &= F2FS_SETTABLE_FS_FL; 3474 if (!fa->flags_valid) 3475 mask &= FS_COMMON_FL; 3476 3477 iflags = f2fs_fsflags_to_iflags(fsflags); 3478 if (f2fs_mask_flags(inode->i_mode, iflags) != iflags) 3479 return -EOPNOTSUPP; 3480 3481 err = f2fs_setflags_common(inode, iflags, f2fs_fsflags_to_iflags(mask)); 3482 if (!err) 3483 err = f2fs_ioc_setproject(inode, fa->fsx_projid); 3484 3485 return err; 3486 } 3487 3488 int f2fs_pin_file_control(struct inode *inode, bool inc) 3489 { 3490 struct f2fs_inode_info *fi = F2FS_I(inode); 3491 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3492 3493 if (IS_DEVICE_ALIASING(inode)) 3494 return -EINVAL; 3495 3496 if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { 3497 f2fs_warn(sbi, "%s: Enable GC = ino %llx after %x GC trials", 3498 __func__, inode->i_ino, fi->i_gc_failures); 3499 clear_inode_flag(inode, FI_PIN_FILE); 3500 return -EAGAIN; 3501 } 3502 3503 /* Use i_gc_failures for normal file as a risk signal. */ 3504 if (inc) 3505 f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1); 3506 3507 return 0; 3508 } 3509 3510 static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) 3511 { 3512 struct inode *inode = file_inode(filp); 3513 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3514 __u32 pin; 3515 int ret = 0; 3516 3517 if (get_user(pin, (__u32 __user *)arg)) 3518 return -EFAULT; 3519 3520 if (!S_ISREG(inode->i_mode)) 3521 return -EINVAL; 3522 3523 if (f2fs_readonly(sbi->sb)) 3524 return -EROFS; 3525 3526 if (!pin && IS_DEVICE_ALIASING(inode)) 3527 return -EOPNOTSUPP; 3528 3529 ret = mnt_want_write_file(filp); 3530 if (ret) 3531 return ret; 3532 3533 inode_lock(inode); 3534 3535 if (f2fs_is_atomic_file(inode)) { 3536 ret = -EINVAL; 3537 goto out; 3538 } 3539 3540 if (!pin) { 3541 clear_inode_flag(inode, FI_PIN_FILE); 3542 f2fs_i_gc_failures_write(inode, 0); 3543 goto done; 3544 } else if (f2fs_is_pinned_file(inode)) { 3545 goto done; 3546 } 3547 3548 if (F2FS_HAS_BLOCKS(inode)) { 3549 ret = -EFBIG; 3550 goto out; 3551 } 3552 3553 /* Let's allow file pinning on zoned device. */ 3554 if (!f2fs_sb_has_blkzoned(sbi) && 3555 f2fs_should_update_outplace(inode, NULL)) { 3556 ret = -EINVAL; 3557 goto out; 3558 } 3559 3560 if (f2fs_pin_file_control(inode, false)) { 3561 ret = -EAGAIN; 3562 goto out; 3563 } 3564 3565 ret = f2fs_convert_inline_inode(inode); 3566 if (ret) 3567 goto out; 3568 3569 if (!f2fs_disable_compressed_file(inode)) { 3570 ret = -EOPNOTSUPP; 3571 goto out; 3572 } 3573 3574 set_inode_flag(inode, FI_PIN_FILE); 3575 ret = F2FS_I(inode)->i_gc_failures; 3576 done: 3577 f2fs_update_time(sbi, REQ_TIME); 3578 out: 3579 inode_unlock(inode); 3580 mnt_drop_write_file(filp); 3581 return ret; 3582 } 3583 3584 static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) 3585 { 3586 struct inode *inode = file_inode(filp); 3587 __u32 pin = 0; 3588 3589 if (is_inode_flag_set(inode, FI_PIN_FILE)) 3590 pin = F2FS_I(inode)->i_gc_failures; 3591 return put_user(pin, (u32 __user *)arg); 3592 } 3593 3594 static int f2fs_ioc_get_dev_alias_file(struct file *filp, unsigned long arg) 3595 { 3596 return put_user(IS_DEVICE_ALIASING(file_inode(filp)) ? 1 : 0, 3597 (u32 __user *)arg); 3598 } 3599 3600 static int f2fs_ioc_io_prio(struct file *filp, unsigned long arg) 3601 { 3602 struct inode *inode = file_inode(filp); 3603 __u32 level; 3604 3605 if (get_user(level, (__u32 __user *)arg)) 3606 return -EFAULT; 3607 3608 if (!S_ISREG(inode->i_mode) || level >= F2FS_IOPRIO_MAX) 3609 return -EINVAL; 3610 3611 inode_lock(inode); 3612 F2FS_I(inode)->ioprio_hint = level; 3613 inode_unlock(inode); 3614 return 0; 3615 } 3616 3617 int f2fs_precache_extents(struct inode *inode) 3618 { 3619 struct f2fs_inode_info *fi = F2FS_I(inode); 3620 struct f2fs_map_blocks map; 3621 pgoff_t m_next_extent; 3622 loff_t end; 3623 int err; 3624 3625 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 3626 return -EOPNOTSUPP; 3627 3628 map.m_lblk = 0; 3629 map.m_pblk = 0; 3630 map.m_next_pgofs = NULL; 3631 map.m_next_extent = &m_next_extent; 3632 map.m_seg_type = NO_CHECK_TYPE; 3633 map.m_may_create = false; 3634 end = F2FS_BLK_ALIGN(i_size_read(inode)); 3635 3636 while (map.m_lblk < end) { 3637 map.m_len = end - map.m_lblk; 3638 3639 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 3640 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE); 3641 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 3642 if (err || !map.m_len) 3643 return err; 3644 3645 map.m_lblk = m_next_extent; 3646 } 3647 3648 return 0; 3649 } 3650 3651 static int f2fs_ioc_precache_extents(struct file *filp) 3652 { 3653 return f2fs_precache_extents(file_inode(filp)); 3654 } 3655 3656 static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) 3657 { 3658 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); 3659 __u64 block_count; 3660 3661 if (!capable(CAP_SYS_ADMIN)) 3662 return -EPERM; 3663 3664 if (f2fs_readonly(sbi->sb)) 3665 return -EROFS; 3666 3667 if (copy_from_user(&block_count, (void __user *)arg, 3668 sizeof(block_count))) 3669 return -EFAULT; 3670 3671 return f2fs_resize_fs(filp, block_count); 3672 } 3673 3674 static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg) 3675 { 3676 struct inode *inode = file_inode(filp); 3677 3678 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 3679 3680 if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) { 3681 f2fs_warn(F2FS_I_SB(inode), 3682 "Can't enable fs-verity on inode %llu: the verity feature is not enabled on this filesystem", 3683 inode->i_ino); 3684 return -EOPNOTSUPP; 3685 } 3686 3687 return fsverity_ioctl_enable(filp, (const void __user *)arg); 3688 } 3689 3690 static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg) 3691 { 3692 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp)))) 3693 return -EOPNOTSUPP; 3694 3695 return fsverity_ioctl_measure(filp, (void __user *)arg); 3696 } 3697 3698 static int f2fs_ioc_read_verity_metadata(struct file *filp, unsigned long arg) 3699 { 3700 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp)))) 3701 return -EOPNOTSUPP; 3702 3703 return fsverity_ioctl_read_metadata(filp, (const void __user *)arg); 3704 } 3705 3706 static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg) 3707 { 3708 struct inode *inode = file_inode(filp); 3709 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3710 char *vbuf; 3711 int count; 3712 int err = 0; 3713 3714 vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL); 3715 if (!vbuf) 3716 return -ENOMEM; 3717 3718 f2fs_down_read(&sbi->sb_lock); 3719 count = utf16s_to_utf8s(sbi->raw_super->volume_name, 3720 ARRAY_SIZE(sbi->raw_super->volume_name), 3721 UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME); 3722 f2fs_up_read(&sbi->sb_lock); 3723 3724 if (copy_to_user((char __user *)arg, vbuf, 3725 min(FSLABEL_MAX, count))) 3726 err = -EFAULT; 3727 3728 kfree(vbuf); 3729 return err; 3730 } 3731 3732 static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg) 3733 { 3734 struct inode *inode = file_inode(filp); 3735 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3736 char *vbuf; 3737 int err = 0; 3738 3739 if (!capable(CAP_SYS_ADMIN)) 3740 return -EPERM; 3741 3742 vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX); 3743 if (IS_ERR(vbuf)) 3744 return PTR_ERR(vbuf); 3745 3746 err = mnt_want_write_file(filp); 3747 if (err) 3748 goto out; 3749 3750 f2fs_down_write(&sbi->sb_lock); 3751 3752 memset(sbi->raw_super->volume_name, 0, 3753 sizeof(sbi->raw_super->volume_name)); 3754 utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN, 3755 sbi->raw_super->volume_name, 3756 ARRAY_SIZE(sbi->raw_super->volume_name)); 3757 3758 err = f2fs_commit_super(sbi, false); 3759 3760 f2fs_up_write(&sbi->sb_lock); 3761 3762 mnt_drop_write_file(filp); 3763 out: 3764 kfree(vbuf); 3765 return err; 3766 } 3767 3768 static int f2fs_get_compress_blocks(struct inode *inode, __u64 *blocks) 3769 { 3770 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 3771 return -EOPNOTSUPP; 3772 3773 if (!f2fs_compressed_file(inode)) 3774 return -EINVAL; 3775 3776 *blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks); 3777 3778 return 0; 3779 } 3780 3781 static int f2fs_ioc_get_compress_blocks(struct file *filp, unsigned long arg) 3782 { 3783 struct inode *inode = file_inode(filp); 3784 __u64 blocks; 3785 int ret; 3786 3787 ret = f2fs_get_compress_blocks(inode, &blocks); 3788 if (ret < 0) 3789 return ret; 3790 3791 return put_user(blocks, (u64 __user *)arg); 3792 } 3793 3794 static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) 3795 { 3796 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 3797 unsigned int released_blocks = 0; 3798 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 3799 block_t blkaddr; 3800 int i; 3801 3802 for (i = 0; i < count; i++) { 3803 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3804 dn->ofs_in_node + i); 3805 3806 if (!__is_valid_data_blkaddr(blkaddr)) 3807 continue; 3808 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, 3809 DATA_GENERIC_ENHANCE))) 3810 return -EFSCORRUPTED; 3811 } 3812 3813 while (count) { 3814 int compr_blocks = 0; 3815 3816 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { 3817 blkaddr = f2fs_data_blkaddr(dn); 3818 3819 if (i == 0) { 3820 if (blkaddr == COMPRESS_ADDR) 3821 continue; 3822 dn->ofs_in_node += cluster_size; 3823 goto next; 3824 } 3825 3826 if (__is_valid_data_blkaddr(blkaddr)) 3827 compr_blocks++; 3828 3829 if (blkaddr != NEW_ADDR) 3830 continue; 3831 3832 f2fs_set_data_blkaddr(dn, NULL_ADDR); 3833 } 3834 3835 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false); 3836 dec_valid_block_count(sbi, dn->inode, 3837 cluster_size - compr_blocks); 3838 3839 released_blocks += cluster_size - compr_blocks; 3840 next: 3841 count -= cluster_size; 3842 } 3843 3844 return released_blocks; 3845 } 3846 3847 static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) 3848 { 3849 struct inode *inode = file_inode(filp); 3850 struct f2fs_inode_info *fi = F2FS_I(inode); 3851 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3852 struct f2fs_lock_context lc; 3853 pgoff_t page_idx = 0, last_idx; 3854 unsigned int released_blocks = 0; 3855 int ret; 3856 int writecount; 3857 3858 if (!f2fs_sb_has_compression(sbi)) 3859 return -EOPNOTSUPP; 3860 3861 if (f2fs_readonly(sbi->sb)) 3862 return -EROFS; 3863 3864 ret = mnt_want_write_file(filp); 3865 if (ret) 3866 return ret; 3867 3868 f2fs_balance_fs(sbi, true); 3869 3870 inode_lock(inode); 3871 3872 writecount = atomic_read(&inode->i_writecount); 3873 if ((filp->f_mode & FMODE_WRITE && writecount != 1) || 3874 (!(filp->f_mode & FMODE_WRITE) && writecount)) { 3875 ret = -EBUSY; 3876 goto out; 3877 } 3878 3879 if (!f2fs_compressed_file(inode) || 3880 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 3881 ret = -EINVAL; 3882 goto out; 3883 } 3884 3885 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 3886 if (ret) 3887 goto out; 3888 3889 if (!atomic_read(&fi->i_compr_blocks)) { 3890 ret = -EPERM; 3891 goto out; 3892 } 3893 3894 set_inode_flag(inode, FI_COMPRESS_RELEASED); 3895 inode_set_ctime_current(inode); 3896 f2fs_mark_inode_dirty_sync(inode, true); 3897 3898 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 3899 filemap_invalidate_lock(inode->i_mapping); 3900 3901 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 3902 3903 while (page_idx < last_idx) { 3904 struct dnode_of_data dn; 3905 pgoff_t end_offset, count; 3906 3907 f2fs_lock_op(sbi, &lc); 3908 3909 set_new_dnode(&dn, inode, NULL, NULL, 0); 3910 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); 3911 if (ret) { 3912 f2fs_unlock_op(sbi, &lc); 3913 if (ret == -ENOENT) { 3914 page_idx = f2fs_get_next_page_offset(&dn, 3915 page_idx); 3916 ret = 0; 3917 continue; 3918 } 3919 break; 3920 } 3921 3922 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 3923 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); 3924 count = round_up(count, fi->i_cluster_size); 3925 3926 ret = release_compress_blocks(&dn, count); 3927 3928 f2fs_put_dnode(&dn); 3929 3930 f2fs_unlock_op(sbi, &lc); 3931 3932 if (ret < 0) 3933 break; 3934 3935 page_idx += count; 3936 released_blocks += ret; 3937 } 3938 3939 filemap_invalidate_unlock(inode->i_mapping); 3940 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 3941 out: 3942 if (released_blocks) 3943 f2fs_update_time(sbi, REQ_TIME); 3944 inode_unlock(inode); 3945 3946 mnt_drop_write_file(filp); 3947 3948 if (ret >= 0) { 3949 ret = put_user(released_blocks, (u64 __user *)arg); 3950 } else if (released_blocks && 3951 atomic_read(&fi->i_compr_blocks)) { 3952 set_sbi_flag(sbi, SBI_NEED_FSCK); 3953 f2fs_warn(sbi, "%s: partial blocks were released i_ino=%llx " 3954 "iblocks=%llu, released=%u, compr_blocks=%u, " 3955 "run fsck to fix.", 3956 __func__, inode->i_ino, inode->i_blocks, 3957 released_blocks, 3958 atomic_read(&fi->i_compr_blocks)); 3959 } 3960 3961 return ret; 3962 } 3963 3964 static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, 3965 unsigned int *reserved_blocks) 3966 { 3967 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 3968 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 3969 block_t blkaddr; 3970 int i; 3971 3972 for (i = 0; i < count; i++) { 3973 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3974 dn->ofs_in_node + i); 3975 3976 if (!__is_valid_data_blkaddr(blkaddr)) 3977 continue; 3978 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, 3979 DATA_GENERIC_ENHANCE))) 3980 return -EFSCORRUPTED; 3981 } 3982 3983 while (count) { 3984 int compr_blocks = 0; 3985 blkcnt_t reserved = 0; 3986 blkcnt_t to_reserved; 3987 int ret; 3988 3989 for (i = 0; i < cluster_size; i++) { 3990 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3991 dn->ofs_in_node + i); 3992 3993 if (i == 0) { 3994 if (blkaddr != COMPRESS_ADDR) { 3995 dn->ofs_in_node += cluster_size; 3996 goto next; 3997 } 3998 continue; 3999 } 4000 4001 /* 4002 * compressed cluster was not released due to it 4003 * fails in release_compress_blocks(), so NEW_ADDR 4004 * is a possible case. 4005 */ 4006 if (blkaddr == NEW_ADDR) { 4007 reserved++; 4008 continue; 4009 } 4010 if (__is_valid_data_blkaddr(blkaddr)) { 4011 compr_blocks++; 4012 continue; 4013 } 4014 } 4015 4016 to_reserved = cluster_size - compr_blocks - reserved; 4017 4018 /* for the case all blocks in cluster were reserved */ 4019 if (reserved && to_reserved == 1) { 4020 dn->ofs_in_node += cluster_size; 4021 goto next; 4022 } 4023 4024 ret = inc_valid_block_count(sbi, dn->inode, 4025 &to_reserved, false); 4026 if (unlikely(ret)) 4027 return ret; 4028 4029 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { 4030 if (f2fs_data_blkaddr(dn) == NULL_ADDR) 4031 f2fs_set_data_blkaddr(dn, NEW_ADDR); 4032 } 4033 4034 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); 4035 4036 *reserved_blocks += to_reserved; 4037 next: 4038 count -= cluster_size; 4039 } 4040 4041 return 0; 4042 } 4043 4044 static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) 4045 { 4046 struct inode *inode = file_inode(filp); 4047 struct f2fs_inode_info *fi = F2FS_I(inode); 4048 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4049 pgoff_t page_idx = 0, last_idx; 4050 unsigned int reserved_blocks = 0; 4051 int ret; 4052 4053 if (!f2fs_sb_has_compression(sbi)) 4054 return -EOPNOTSUPP; 4055 4056 if (f2fs_readonly(sbi->sb)) 4057 return -EROFS; 4058 4059 ret = mnt_want_write_file(filp); 4060 if (ret) 4061 return ret; 4062 4063 f2fs_balance_fs(sbi, true); 4064 4065 inode_lock(inode); 4066 4067 if (!f2fs_compressed_file(inode) || 4068 !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4069 ret = -EINVAL; 4070 goto unlock_inode; 4071 } 4072 4073 if (atomic_read(&fi->i_compr_blocks)) 4074 goto unlock_inode; 4075 4076 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 4077 filemap_invalidate_lock(inode->i_mapping); 4078 4079 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4080 4081 while (page_idx < last_idx) { 4082 struct dnode_of_data dn; 4083 struct f2fs_lock_context lc; 4084 pgoff_t end_offset, count; 4085 4086 f2fs_lock_op(sbi, &lc); 4087 4088 set_new_dnode(&dn, inode, NULL, NULL, 0); 4089 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); 4090 if (ret) { 4091 f2fs_unlock_op(sbi, &lc); 4092 if (ret == -ENOENT) { 4093 page_idx = f2fs_get_next_page_offset(&dn, 4094 page_idx); 4095 ret = 0; 4096 continue; 4097 } 4098 break; 4099 } 4100 4101 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 4102 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); 4103 count = round_up(count, fi->i_cluster_size); 4104 4105 ret = reserve_compress_blocks(&dn, count, &reserved_blocks); 4106 4107 f2fs_put_dnode(&dn); 4108 4109 f2fs_unlock_op(sbi, &lc); 4110 4111 if (ret < 0) 4112 break; 4113 4114 page_idx += count; 4115 } 4116 4117 filemap_invalidate_unlock(inode->i_mapping); 4118 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 4119 4120 if (!ret) { 4121 clear_inode_flag(inode, FI_COMPRESS_RELEASED); 4122 inode_set_ctime_current(inode); 4123 f2fs_mark_inode_dirty_sync(inode, true); 4124 } 4125 unlock_inode: 4126 if (reserved_blocks) 4127 f2fs_update_time(sbi, REQ_TIME); 4128 inode_unlock(inode); 4129 mnt_drop_write_file(filp); 4130 4131 if (!ret) { 4132 ret = put_user(reserved_blocks, (u64 __user *)arg); 4133 } else if (reserved_blocks && 4134 atomic_read(&fi->i_compr_blocks)) { 4135 set_sbi_flag(sbi, SBI_NEED_FSCK); 4136 f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%llx " 4137 "iblocks=%llu, reserved=%u, compr_blocks=%u, " 4138 "run fsck to fix.", 4139 __func__, inode->i_ino, inode->i_blocks, 4140 reserved_blocks, 4141 atomic_read(&fi->i_compr_blocks)); 4142 } 4143 4144 return ret; 4145 } 4146 4147 static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode, 4148 pgoff_t off, block_t block, block_t len, u32 flags) 4149 { 4150 sector_t sector = SECTOR_FROM_BLOCK(block); 4151 sector_t nr_sects = SECTOR_FROM_BLOCK(len); 4152 int ret = 0; 4153 4154 if (flags & F2FS_TRIM_FILE_DISCARD) { 4155 if (bdev_max_secure_erase_sectors(bdev)) 4156 ret = blkdev_issue_secure_erase(bdev, sector, nr_sects, 4157 GFP_NOFS); 4158 else 4159 ret = blkdev_issue_discard(bdev, sector, nr_sects, 4160 GFP_NOFS); 4161 } 4162 4163 if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) { 4164 if (IS_ENCRYPTED(inode)) 4165 ret = fscrypt_zeroout_range(inode, 4166 (loff_t)off << inode->i_blkbits, sector, 4167 (u64)len << inode->i_blkbits); 4168 else 4169 ret = blkdev_issue_zeroout(bdev, sector, nr_sects, 4170 GFP_NOFS, 0); 4171 } 4172 4173 return ret; 4174 } 4175 4176 static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) 4177 { 4178 struct inode *inode = file_inode(filp); 4179 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4180 struct address_space *mapping = inode->i_mapping; 4181 struct block_device *prev_bdev = NULL; 4182 struct f2fs_sectrim_range range; 4183 pgoff_t index, pg_end, prev_index = 0; 4184 block_t prev_block = 0, len = 0; 4185 loff_t end_addr; 4186 bool to_end = false; 4187 int ret = 0; 4188 4189 if (!(filp->f_mode & FMODE_WRITE)) 4190 return -EBADF; 4191 4192 if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg, 4193 sizeof(range))) 4194 return -EFAULT; 4195 4196 if (range.flags == 0 || (range.flags & ~F2FS_TRIM_FILE_MASK) || 4197 !S_ISREG(inode->i_mode)) 4198 return -EINVAL; 4199 4200 if (((range.flags & F2FS_TRIM_FILE_DISCARD) && 4201 !f2fs_hw_support_discard(sbi)) || 4202 ((range.flags & F2FS_TRIM_FILE_ZEROOUT) && 4203 IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi))) 4204 return -EOPNOTSUPP; 4205 4206 ret = mnt_want_write_file(filp); 4207 if (ret) 4208 return ret; 4209 inode_lock(inode); 4210 4211 if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) || 4212 range.start >= inode->i_size) { 4213 ret = -EINVAL; 4214 goto err; 4215 } 4216 4217 if (range.len == 0) 4218 goto err; 4219 4220 if (inode->i_size - range.start > range.len) { 4221 end_addr = range.start + range.len; 4222 } else { 4223 end_addr = range.len == (u64)-1 ? 4224 sbi->sb->s_maxbytes : inode->i_size; 4225 to_end = true; 4226 } 4227 4228 if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) || 4229 (!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) { 4230 ret = -EINVAL; 4231 goto err; 4232 } 4233 4234 index = F2FS_BYTES_TO_BLK(range.start); 4235 pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE); 4236 4237 ret = f2fs_convert_inline_inode(inode); 4238 if (ret) 4239 goto err; 4240 4241 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 4242 filemap_invalidate_lock(mapping); 4243 4244 ret = filemap_write_and_wait_range(mapping, range.start, 4245 to_end ? LLONG_MAX : end_addr - 1); 4246 if (ret) 4247 goto out; 4248 4249 truncate_inode_pages_range(mapping, range.start, 4250 to_end ? -1 : end_addr - 1); 4251 4252 while (index < pg_end) { 4253 struct dnode_of_data dn; 4254 pgoff_t end_offset, count; 4255 int i; 4256 4257 set_new_dnode(&dn, inode, NULL, NULL, 0); 4258 ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); 4259 if (ret) { 4260 if (ret == -ENOENT) { 4261 index = f2fs_get_next_page_offset(&dn, index); 4262 continue; 4263 } 4264 goto out; 4265 } 4266 4267 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 4268 count = min(end_offset - dn.ofs_in_node, pg_end - index); 4269 for (i = 0; i < count; i++, index++, dn.ofs_in_node++) { 4270 struct block_device *cur_bdev; 4271 block_t blkaddr = f2fs_data_blkaddr(&dn); 4272 4273 if (!__is_valid_data_blkaddr(blkaddr)) 4274 continue; 4275 4276 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, 4277 DATA_GENERIC_ENHANCE)) { 4278 ret = -EFSCORRUPTED; 4279 f2fs_put_dnode(&dn); 4280 goto out; 4281 } 4282 4283 cur_bdev = f2fs_target_device(sbi, blkaddr, NULL); 4284 if (f2fs_is_multi_device(sbi)) { 4285 int di = f2fs_target_device_index(sbi, blkaddr); 4286 4287 blkaddr -= FDEV(di).start_blk; 4288 } 4289 4290 if (len) { 4291 if (prev_bdev == cur_bdev && 4292 index == prev_index + len && 4293 blkaddr == prev_block + len) { 4294 len++; 4295 } else { 4296 ret = f2fs_secure_erase(prev_bdev, 4297 inode, prev_index, prev_block, 4298 len, range.flags); 4299 if (ret) { 4300 f2fs_put_dnode(&dn); 4301 goto out; 4302 } 4303 4304 len = 0; 4305 } 4306 } 4307 4308 if (!len) { 4309 prev_bdev = cur_bdev; 4310 prev_index = index; 4311 prev_block = blkaddr; 4312 len = 1; 4313 } 4314 } 4315 4316 f2fs_put_dnode(&dn); 4317 4318 if (fatal_signal_pending(current)) { 4319 ret = -EINTR; 4320 goto out; 4321 } 4322 cond_resched(); 4323 } 4324 4325 if (len) 4326 ret = f2fs_secure_erase(prev_bdev, inode, prev_index, 4327 prev_block, len, range.flags); 4328 f2fs_update_time(sbi, REQ_TIME); 4329 out: 4330 filemap_invalidate_unlock(mapping); 4331 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 4332 err: 4333 inode_unlock(inode); 4334 mnt_drop_write_file(filp); 4335 4336 return ret; 4337 } 4338 4339 static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg) 4340 { 4341 struct inode *inode = file_inode(filp); 4342 struct f2fs_comp_option option; 4343 4344 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 4345 return -EOPNOTSUPP; 4346 4347 inode_lock_shared(inode); 4348 4349 if (!f2fs_compressed_file(inode)) { 4350 inode_unlock_shared(inode); 4351 return -ENODATA; 4352 } 4353 4354 option.algorithm = F2FS_I(inode)->i_compress_algorithm; 4355 option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size; 4356 4357 inode_unlock_shared(inode); 4358 4359 if (copy_to_user((struct f2fs_comp_option __user *)arg, &option, 4360 sizeof(option))) 4361 return -EFAULT; 4362 4363 return 0; 4364 } 4365 4366 static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) 4367 { 4368 struct inode *inode = file_inode(filp); 4369 struct f2fs_inode_info *fi = F2FS_I(inode); 4370 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4371 struct f2fs_comp_option option; 4372 int ret = 0; 4373 4374 if (!f2fs_sb_has_compression(sbi)) 4375 return -EOPNOTSUPP; 4376 4377 if (!(filp->f_mode & FMODE_WRITE)) 4378 return -EBADF; 4379 4380 if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg, 4381 sizeof(option))) 4382 return -EFAULT; 4383 4384 if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || 4385 option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || 4386 option.algorithm >= COMPRESS_MAX) 4387 return -EINVAL; 4388 4389 ret = mnt_want_write_file(filp); 4390 if (ret) 4391 return ret; 4392 inode_lock(inode); 4393 4394 f2fs_down_write(&F2FS_I(inode)->i_sem); 4395 if (!f2fs_compressed_file(inode)) { 4396 ret = -EINVAL; 4397 goto out; 4398 } 4399 4400 if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) { 4401 ret = -EBUSY; 4402 goto out; 4403 } 4404 4405 if (F2FS_HAS_BLOCKS(inode)) { 4406 ret = -EFBIG; 4407 goto out; 4408 } 4409 4410 fi->i_compress_algorithm = option.algorithm; 4411 fi->i_log_cluster_size = option.log_cluster_size; 4412 fi->i_cluster_size = BIT(option.log_cluster_size); 4413 /* Set default level */ 4414 if (fi->i_compress_algorithm == COMPRESS_ZSTD) 4415 fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; 4416 else 4417 fi->i_compress_level = 0; 4418 /* Adjust mount option level */ 4419 if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm && 4420 F2FS_OPTION(sbi).compress_level) 4421 fi->i_compress_level = F2FS_OPTION(sbi).compress_level; 4422 f2fs_mark_inode_dirty_sync(inode, true); 4423 4424 if (!f2fs_is_compress_backend_ready(inode)) 4425 f2fs_warn(sbi, "compression algorithm is successfully set, " 4426 "but current kernel doesn't support this algorithm."); 4427 out: 4428 f2fs_up_write(&fi->i_sem); 4429 inode_unlock(inode); 4430 mnt_drop_write_file(filp); 4431 4432 return ret; 4433 } 4434 4435 static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) 4436 { 4437 DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx); 4438 struct address_space *mapping = inode->i_mapping; 4439 struct folio *folio; 4440 pgoff_t redirty_idx = page_idx; 4441 int page_len = 0, ret = 0; 4442 4443 filemap_invalidate_lock_shared(mapping); 4444 page_cache_ra_unbounded(&ractl, len, 0); 4445 filemap_invalidate_unlock_shared(mapping); 4446 4447 do { 4448 folio = read_cache_folio(mapping, page_idx, NULL, NULL); 4449 if (IS_ERR(folio)) { 4450 ret = PTR_ERR(folio); 4451 break; 4452 } 4453 page_len += folio_nr_pages(folio) - (page_idx - folio->index); 4454 page_idx = folio_next_index(folio); 4455 } while (page_len < len); 4456 4457 do { 4458 folio = filemap_lock_folio(mapping, redirty_idx); 4459 4460 /* It will never fail, when folio has pinned above */ 4461 f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(folio)); 4462 4463 f2fs_folio_wait_writeback(folio, DATA, true, true); 4464 4465 folio_mark_dirty(folio); 4466 folio_set_f2fs_gcing(folio); 4467 redirty_idx = folio_next_index(folio); 4468 folio_unlock(folio); 4469 folio_put_refs(folio, 2); 4470 } while (redirty_idx < page_idx); 4471 4472 return ret; 4473 } 4474 4475 static int f2fs_ioc_decompress_file(struct file *filp) 4476 { 4477 struct inode *inode = file_inode(filp); 4478 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4479 struct f2fs_inode_info *fi = F2FS_I(inode); 4480 pgoff_t page_idx = 0, last_idx, cluster_idx; 4481 int ret; 4482 4483 if (!f2fs_sb_has_compression(sbi) || 4484 F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) 4485 return -EOPNOTSUPP; 4486 4487 if (!(filp->f_mode & FMODE_WRITE)) 4488 return -EBADF; 4489 4490 f2fs_balance_fs(sbi, true); 4491 4492 ret = mnt_want_write_file(filp); 4493 if (ret) 4494 return ret; 4495 inode_lock(inode); 4496 4497 if (!f2fs_is_compress_backend_ready(inode)) { 4498 ret = -EOPNOTSUPP; 4499 goto out; 4500 } 4501 4502 if (!f2fs_compressed_file(inode) || 4503 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4504 ret = -EINVAL; 4505 goto out; 4506 } 4507 4508 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 4509 if (ret) 4510 goto out; 4511 4512 if (!atomic_read(&fi->i_compr_blocks)) 4513 goto out; 4514 4515 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4516 last_idx >>= fi->i_log_cluster_size; 4517 4518 for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { 4519 page_idx = cluster_idx << fi->i_log_cluster_size; 4520 4521 if (!f2fs_is_compressed_cluster(inode, page_idx)) 4522 continue; 4523 4524 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); 4525 if (ret < 0) 4526 break; 4527 4528 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { 4529 ret = filemap_fdatawrite(inode->i_mapping); 4530 if (ret < 0) 4531 break; 4532 } 4533 4534 cond_resched(); 4535 if (fatal_signal_pending(current)) { 4536 ret = -EINTR; 4537 break; 4538 } 4539 } 4540 4541 if (!ret) 4542 ret = filemap_write_and_wait_range(inode->i_mapping, 0, 4543 LLONG_MAX); 4544 4545 if (ret) 4546 f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.", 4547 __func__, ret); 4548 f2fs_update_time(sbi, REQ_TIME); 4549 out: 4550 inode_unlock(inode); 4551 mnt_drop_write_file(filp); 4552 4553 return ret; 4554 } 4555 4556 static int f2fs_ioc_compress_file(struct file *filp) 4557 { 4558 struct inode *inode = file_inode(filp); 4559 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4560 struct f2fs_inode_info *fi = F2FS_I(inode); 4561 pgoff_t page_idx = 0, last_idx, cluster_idx; 4562 int ret; 4563 4564 if (!f2fs_sb_has_compression(sbi) || 4565 F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) 4566 return -EOPNOTSUPP; 4567 4568 if (!(filp->f_mode & FMODE_WRITE)) 4569 return -EBADF; 4570 4571 f2fs_balance_fs(sbi, true); 4572 4573 ret = mnt_want_write_file(filp); 4574 if (ret) 4575 return ret; 4576 inode_lock(inode); 4577 4578 if (!f2fs_is_compress_backend_ready(inode)) { 4579 ret = -EOPNOTSUPP; 4580 goto out; 4581 } 4582 4583 if (!f2fs_compressed_file(inode) || 4584 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4585 ret = -EINVAL; 4586 goto out; 4587 } 4588 4589 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 4590 if (ret) 4591 goto out; 4592 4593 set_inode_flag(inode, FI_ENABLE_COMPRESS); 4594 4595 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4596 last_idx >>= fi->i_log_cluster_size; 4597 4598 for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { 4599 page_idx = cluster_idx << fi->i_log_cluster_size; 4600 4601 if (f2fs_is_sparse_cluster(inode, page_idx)) 4602 continue; 4603 4604 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); 4605 if (ret < 0) 4606 break; 4607 4608 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { 4609 ret = filemap_fdatawrite(inode->i_mapping); 4610 if (ret < 0) 4611 break; 4612 } 4613 4614 cond_resched(); 4615 if (fatal_signal_pending(current)) { 4616 ret = -EINTR; 4617 break; 4618 } 4619 } 4620 4621 if (!ret) 4622 ret = filemap_write_and_wait_range(inode->i_mapping, 0, 4623 LLONG_MAX); 4624 4625 clear_inode_flag(inode, FI_ENABLE_COMPRESS); 4626 4627 if (ret) 4628 f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.", 4629 __func__, ret); 4630 f2fs_update_time(sbi, REQ_TIME); 4631 out: 4632 inode_unlock(inode); 4633 mnt_drop_write_file(filp); 4634 4635 return ret; 4636 } 4637 4638 static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 4639 { 4640 switch (cmd) { 4641 case FS_IOC_GETVERSION: 4642 return f2fs_ioc_getversion(filp, arg); 4643 case F2FS_IOC_START_ATOMIC_WRITE: 4644 return f2fs_ioc_start_atomic_write(filp, false); 4645 case F2FS_IOC_START_ATOMIC_REPLACE: 4646 return f2fs_ioc_start_atomic_write(filp, true); 4647 case F2FS_IOC_COMMIT_ATOMIC_WRITE: 4648 return f2fs_ioc_commit_atomic_write(filp); 4649 case F2FS_IOC_ABORT_ATOMIC_WRITE: 4650 return f2fs_ioc_abort_atomic_write(filp); 4651 case F2FS_IOC_START_VOLATILE_WRITE: 4652 case F2FS_IOC_RELEASE_VOLATILE_WRITE: 4653 return -EOPNOTSUPP; 4654 case F2FS_IOC_SHUTDOWN: 4655 return f2fs_ioc_shutdown(filp, arg); 4656 case FITRIM: 4657 return f2fs_ioc_fitrim(filp, arg); 4658 case FS_IOC_SET_ENCRYPTION_POLICY: 4659 return f2fs_ioc_set_encryption_policy(filp, arg); 4660 case FS_IOC_GET_ENCRYPTION_POLICY: 4661 return f2fs_ioc_get_encryption_policy(filp, arg); 4662 case FS_IOC_GET_ENCRYPTION_PWSALT: 4663 return f2fs_ioc_get_encryption_pwsalt(filp, arg); 4664 case FS_IOC_GET_ENCRYPTION_POLICY_EX: 4665 return f2fs_ioc_get_encryption_policy_ex(filp, arg); 4666 case FS_IOC_ADD_ENCRYPTION_KEY: 4667 return f2fs_ioc_add_encryption_key(filp, arg); 4668 case FS_IOC_REMOVE_ENCRYPTION_KEY: 4669 return f2fs_ioc_remove_encryption_key(filp, arg); 4670 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: 4671 return f2fs_ioc_remove_encryption_key_all_users(filp, arg); 4672 case FS_IOC_GET_ENCRYPTION_KEY_STATUS: 4673 return f2fs_ioc_get_encryption_key_status(filp, arg); 4674 case FS_IOC_GET_ENCRYPTION_NONCE: 4675 return f2fs_ioc_get_encryption_nonce(filp, arg); 4676 case F2FS_IOC_GARBAGE_COLLECT: 4677 return f2fs_ioc_gc(filp, arg); 4678 case F2FS_IOC_GARBAGE_COLLECT_RANGE: 4679 return f2fs_ioc_gc_range(filp, arg); 4680 case F2FS_IOC_WRITE_CHECKPOINT: 4681 return f2fs_ioc_write_checkpoint(filp); 4682 case F2FS_IOC_DEFRAGMENT: 4683 return f2fs_ioc_defragment(filp, arg); 4684 case F2FS_IOC_MOVE_RANGE: 4685 return f2fs_ioc_move_range(filp, arg); 4686 case F2FS_IOC_FLUSH_DEVICE: 4687 return f2fs_ioc_flush_device(filp, arg); 4688 case F2FS_IOC_GET_FEATURES: 4689 return f2fs_ioc_get_features(filp, arg); 4690 case F2FS_IOC_GET_PIN_FILE: 4691 return f2fs_ioc_get_pin_file(filp, arg); 4692 case F2FS_IOC_SET_PIN_FILE: 4693 return f2fs_ioc_set_pin_file(filp, arg); 4694 case F2FS_IOC_PRECACHE_EXTENTS: 4695 return f2fs_ioc_precache_extents(filp); 4696 case F2FS_IOC_RESIZE_FS: 4697 return f2fs_ioc_resize_fs(filp, arg); 4698 case FS_IOC_ENABLE_VERITY: 4699 return f2fs_ioc_enable_verity(filp, arg); 4700 case FS_IOC_MEASURE_VERITY: 4701 return f2fs_ioc_measure_verity(filp, arg); 4702 case FS_IOC_READ_VERITY_METADATA: 4703 return f2fs_ioc_read_verity_metadata(filp, arg); 4704 case FS_IOC_GETFSLABEL: 4705 return f2fs_ioc_getfslabel(filp, arg); 4706 case FS_IOC_SETFSLABEL: 4707 return f2fs_ioc_setfslabel(filp, arg); 4708 case F2FS_IOC_GET_COMPRESS_BLOCKS: 4709 return f2fs_ioc_get_compress_blocks(filp, arg); 4710 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: 4711 return f2fs_release_compress_blocks(filp, arg); 4712 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: 4713 return f2fs_reserve_compress_blocks(filp, arg); 4714 case F2FS_IOC_SEC_TRIM_FILE: 4715 return f2fs_sec_trim_file(filp, arg); 4716 case F2FS_IOC_GET_COMPRESS_OPTION: 4717 return f2fs_ioc_get_compress_option(filp, arg); 4718 case F2FS_IOC_SET_COMPRESS_OPTION: 4719 return f2fs_ioc_set_compress_option(filp, arg); 4720 case F2FS_IOC_DECOMPRESS_FILE: 4721 return f2fs_ioc_decompress_file(filp); 4722 case F2FS_IOC_COMPRESS_FILE: 4723 return f2fs_ioc_compress_file(filp); 4724 case F2FS_IOC_GET_DEV_ALIAS_FILE: 4725 return f2fs_ioc_get_dev_alias_file(filp, arg); 4726 case F2FS_IOC_IO_PRIO: 4727 return f2fs_ioc_io_prio(filp, arg); 4728 default: 4729 return -ENOTTY; 4730 } 4731 } 4732 4733 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 4734 { 4735 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) 4736 return -EIO; 4737 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp)))) 4738 return -ENOSPC; 4739 4740 return __f2fs_ioctl(filp, cmd, arg); 4741 } 4742 4743 /* 4744 * Return %true if the given read or write request should use direct I/O, or 4745 * %false if it should use buffered I/O. 4746 */ 4747 static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb, 4748 struct iov_iter *iter) 4749 { 4750 unsigned int align; 4751 4752 if (!(iocb->ki_flags & IOCB_DIRECT)) 4753 return false; 4754 4755 if (f2fs_force_buffered_io(inode, iov_iter_rw(iter))) 4756 return false; 4757 4758 /* 4759 * Direct I/O not aligned to the disk's logical_block_size will be 4760 * attempted, but will fail with -EINVAL. 4761 * 4762 * f2fs additionally requires that direct I/O be aligned to the 4763 * filesystem block size, which is often a stricter requirement. 4764 * However, f2fs traditionally falls back to buffered I/O on requests 4765 * that are logical_block_size-aligned but not fs-block aligned. 4766 * 4767 * The below logic implements this behavior. 4768 */ 4769 align = iocb->ki_pos | iov_iter_alignment(iter); 4770 if (!IS_ALIGNED(align, i_blocksize(inode)) && 4771 IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev))) 4772 return false; 4773 4774 return true; 4775 } 4776 4777 static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error, 4778 unsigned int flags) 4779 { 4780 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); 4781 4782 dec_page_count(sbi, F2FS_DIO_READ); 4783 if (error) 4784 return error; 4785 f2fs_update_iostat(sbi, NULL, APP_DIRECT_READ_IO, size); 4786 return 0; 4787 } 4788 4789 static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = { 4790 .end_io = f2fs_dio_read_end_io, 4791 }; 4792 4793 static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) 4794 { 4795 struct file *file = iocb->ki_filp; 4796 struct inode *inode = file_inode(file); 4797 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4798 struct f2fs_inode_info *fi = F2FS_I(inode); 4799 const loff_t pos = iocb->ki_pos; 4800 const size_t count = iov_iter_count(to); 4801 struct iomap_dio *dio; 4802 ssize_t ret; 4803 4804 if (count == 0) 4805 return 0; /* skip atime update */ 4806 4807 trace_f2fs_direct_IO_enter(inode, iocb, count, READ); 4808 4809 if (iocb->ki_flags & IOCB_NOWAIT) { 4810 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { 4811 ret = -EAGAIN; 4812 goto out; 4813 } 4814 } else { 4815 f2fs_down_read(&fi->i_gc_rwsem[READ]); 4816 } 4817 4818 /* dio is not compatible w/ atomic file */ 4819 if (f2fs_is_atomic_file(inode)) { 4820 f2fs_up_read(&fi->i_gc_rwsem[READ]); 4821 ret = -EOPNOTSUPP; 4822 goto out; 4823 } 4824 4825 /* 4826 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of 4827 * the higher-level function iomap_dio_rw() in order to ensure that the 4828 * F2FS_DIO_READ counter will be decremented correctly in all cases. 4829 */ 4830 inc_page_count(sbi, F2FS_DIO_READ); 4831 dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops, 4832 &f2fs_iomap_dio_read_ops, 0, NULL, 0); 4833 if (IS_ERR_OR_NULL(dio)) { 4834 ret = PTR_ERR_OR_ZERO(dio); 4835 if (ret != -EIOCBQUEUED) 4836 dec_page_count(sbi, F2FS_DIO_READ); 4837 } else { 4838 ret = iomap_dio_complete(dio); 4839 } 4840 4841 f2fs_up_read(&fi->i_gc_rwsem[READ]); 4842 4843 file_accessed(file); 4844 out: 4845 trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret); 4846 return ret; 4847 } 4848 4849 static void f2fs_trace_rw_file_path(struct file *file, loff_t pos, size_t count, 4850 int rw) 4851 { 4852 struct inode *inode = file_inode(file); 4853 char *buf, *path; 4854 4855 buf = f2fs_getname(F2FS_I_SB(inode)); 4856 if (!buf) 4857 return; 4858 path = dentry_path_raw(file_dentry(file), buf, PATH_MAX); 4859 if (IS_ERR(path)) 4860 goto free_buf; 4861 if (rw == WRITE) 4862 trace_f2fs_datawrite_start(inode, pos, count, 4863 current->pid, path, current->comm); 4864 else 4865 trace_f2fs_dataread_start(inode, pos, count, 4866 current->pid, path, current->comm); 4867 free_buf: 4868 f2fs_putname(buf); 4869 } 4870 4871 static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 4872 { 4873 struct inode *inode = file_inode(iocb->ki_filp); 4874 const loff_t pos = iocb->ki_pos; 4875 ssize_t ret; 4876 bool dio; 4877 4878 if (!f2fs_is_compress_backend_ready(inode)) 4879 return -EOPNOTSUPP; 4880 4881 if (trace_f2fs_dataread_start_enabled()) 4882 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, 4883 iov_iter_count(to), READ); 4884 4885 dio = f2fs_should_use_dio(inode, iocb, to); 4886 4887 /* In LFS mode, if there is inflight dio, wait for its completion */ 4888 if (f2fs_lfs_mode(F2FS_I_SB(inode)) && 4889 get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) && 4890 (!f2fs_is_pinned_file(inode) || !dio)) 4891 inode_dio_wait(inode); 4892 4893 if (dio) { 4894 ret = f2fs_dio_read_iter(iocb, to); 4895 } else { 4896 ret = filemap_read(iocb, to, 0); 4897 if (ret > 0) 4898 f2fs_update_iostat(F2FS_I_SB(inode), inode, 4899 APP_BUFFERED_READ_IO, ret); 4900 } 4901 trace_f2fs_dataread_end(inode, pos, ret); 4902 return ret; 4903 } 4904 4905 static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos, 4906 struct pipe_inode_info *pipe, 4907 size_t len, unsigned int flags) 4908 { 4909 struct inode *inode = file_inode(in); 4910 const loff_t pos = *ppos; 4911 ssize_t ret; 4912 4913 if (!f2fs_is_compress_backend_ready(inode)) 4914 return -EOPNOTSUPP; 4915 4916 if (trace_f2fs_dataread_start_enabled()) 4917 f2fs_trace_rw_file_path(in, pos, len, READ); 4918 4919 ret = filemap_splice_read(in, ppos, pipe, len, flags); 4920 if (ret > 0) 4921 f2fs_update_iostat(F2FS_I_SB(inode), inode, 4922 APP_BUFFERED_READ_IO, ret); 4923 4924 trace_f2fs_dataread_end(inode, pos, ret); 4925 return ret; 4926 } 4927 4928 static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from) 4929 { 4930 struct file *file = iocb->ki_filp; 4931 struct inode *inode = file_inode(file); 4932 ssize_t count; 4933 int err; 4934 4935 if (IS_IMMUTABLE(inode)) 4936 return -EPERM; 4937 4938 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) 4939 return -EPERM; 4940 4941 count = generic_write_checks(iocb, from); 4942 if (count <= 0) 4943 return count; 4944 4945 err = file_modified(file); 4946 if (err) 4947 return err; 4948 4949 f2fs_zero_post_eof_page(inode, 4950 iocb->ki_pos + iov_iter_count(from), true); 4951 return count; 4952 } 4953 4954 /* 4955 * Preallocate blocks for a write request, if it is possible and helpful to do 4956 * so. Returns a positive number if blocks may have been preallocated, 0 if no 4957 * blocks were preallocated, or a negative errno value if something went 4958 * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the 4959 * requested blocks (not just some of them) have been allocated. 4960 */ 4961 static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, 4962 bool dio) 4963 { 4964 struct inode *inode = file_inode(iocb->ki_filp); 4965 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4966 const loff_t pos = iocb->ki_pos; 4967 const size_t count = iov_iter_count(iter); 4968 struct f2fs_map_blocks map = {}; 4969 int flag; 4970 int ret; 4971 4972 /* If it will be an out-of-place direct write, don't bother. */ 4973 if (dio && f2fs_lfs_mode(sbi)) 4974 return 0; 4975 /* 4976 * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into 4977 * buffered IO, if DIO meets any holes. 4978 */ 4979 if (dio && i_size_read(inode) && 4980 (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode)))) 4981 return 0; 4982 4983 /* No-wait I/O can't allocate blocks. */ 4984 if (iocb->ki_flags & IOCB_NOWAIT) 4985 return 0; 4986 4987 /* If it will be a short write, don't bother. */ 4988 if (fault_in_iov_iter_readable(iter, count)) 4989 return 0; 4990 4991 if (f2fs_has_inline_data(inode)) { 4992 /* If the data will fit inline, don't bother. */ 4993 if (pos + count <= MAX_INLINE_DATA(inode)) 4994 return 0; 4995 ret = f2fs_convert_inline_inode(inode); 4996 if (ret) 4997 return ret; 4998 } 4999 5000 /* Do not preallocate blocks that will be written partially in 4KB. */ 5001 map.m_lblk = F2FS_BLK_ALIGN(pos); 5002 map.m_len = F2FS_BYTES_TO_BLK(pos + count); 5003 if (map.m_len > map.m_lblk) 5004 map.m_len -= map.m_lblk; 5005 else 5006 return 0; 5007 5008 if (!IS_DEVICE_ALIASING(inode)) 5009 map.m_may_create = true; 5010 if (dio) { 5011 map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, 5012 inode->i_write_hint); 5013 flag = F2FS_GET_BLOCK_PRE_DIO; 5014 } else { 5015 map.m_seg_type = NO_CHECK_TYPE; 5016 flag = F2FS_GET_BLOCK_PRE_AIO; 5017 } 5018 5019 ret = f2fs_map_blocks(inode, &map, flag); 5020 /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */ 5021 if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0)) 5022 return ret; 5023 if (ret == 0) 5024 set_inode_flag(inode, FI_PREALLOCATED_ALL); 5025 return map.m_len; 5026 } 5027 5028 static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb, 5029 struct iov_iter *from) 5030 { 5031 struct file *file = iocb->ki_filp; 5032 struct inode *inode = file_inode(file); 5033 ssize_t ret; 5034 5035 if (iocb->ki_flags & IOCB_NOWAIT) 5036 return -EOPNOTSUPP; 5037 5038 ret = generic_perform_write(iocb, from); 5039 5040 if (ret > 0) { 5041 f2fs_update_iostat(F2FS_I_SB(inode), inode, 5042 APP_BUFFERED_IO, ret); 5043 } 5044 return ret; 5045 } 5046 5047 static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error, 5048 unsigned int flags) 5049 { 5050 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); 5051 5052 dec_page_count(sbi, F2FS_DIO_WRITE); 5053 if (error) 5054 return error; 5055 f2fs_update_time(sbi, REQ_TIME); 5056 f2fs_update_iostat(sbi, NULL, APP_DIRECT_IO, size); 5057 return 0; 5058 } 5059 5060 static void f2fs_dio_write_submit_io(const struct iomap_iter *iter, 5061 struct bio *bio, loff_t file_offset) 5062 { 5063 struct inode *inode = iter->inode; 5064 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 5065 enum log_type type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint); 5066 enum temp_type temp = f2fs_get_segment_temp(sbi, type); 5067 5068 bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp); 5069 blk_crypto_submit_bio(bio); 5070 } 5071 5072 static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = { 5073 .end_io = f2fs_dio_write_end_io, 5074 .submit_io = f2fs_dio_write_submit_io, 5075 }; 5076 5077 static void f2fs_flush_buffered_write(struct address_space *mapping, 5078 loff_t start_pos, loff_t end_pos) 5079 { 5080 int ret; 5081 5082 ret = filemap_write_and_wait_range(mapping, start_pos, end_pos); 5083 if (ret < 0) 5084 return; 5085 invalidate_mapping_pages(mapping, 5086 start_pos >> PAGE_SHIFT, 5087 end_pos >> PAGE_SHIFT); 5088 } 5089 5090 static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, 5091 bool *may_need_sync) 5092 { 5093 struct file *file = iocb->ki_filp; 5094 struct inode *inode = file_inode(file); 5095 struct f2fs_inode_info *fi = F2FS_I(inode); 5096 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 5097 const bool do_opu = f2fs_lfs_mode(sbi); 5098 const loff_t pos = iocb->ki_pos; 5099 const ssize_t count = iov_iter_count(from); 5100 unsigned int dio_flags; 5101 struct iomap_dio *dio; 5102 ssize_t ret; 5103 5104 trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE); 5105 5106 if (iocb->ki_flags & IOCB_NOWAIT) { 5107 /* f2fs_convert_inline_inode() and block allocation can block */ 5108 if (f2fs_has_inline_data(inode) || 5109 !f2fs_overwrite_io(inode, pos, count)) { 5110 ret = -EAGAIN; 5111 goto out; 5112 } 5113 5114 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) { 5115 ret = -EAGAIN; 5116 goto out; 5117 } 5118 if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { 5119 f2fs_up_read(&fi->i_gc_rwsem[WRITE]); 5120 ret = -EAGAIN; 5121 goto out; 5122 } 5123 } else { 5124 ret = f2fs_convert_inline_inode(inode); 5125 if (ret) 5126 goto out; 5127 5128 f2fs_down_read(&fi->i_gc_rwsem[WRITE]); 5129 if (do_opu) 5130 f2fs_down_read(&fi->i_gc_rwsem[READ]); 5131 } 5132 5133 /* 5134 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of 5135 * the higher-level function iomap_dio_rw() in order to ensure that the 5136 * F2FS_DIO_WRITE counter will be decremented correctly in all cases. 5137 */ 5138 inc_page_count(sbi, F2FS_DIO_WRITE); 5139 dio_flags = 0; 5140 if (pos + count > inode->i_size) 5141 dio_flags |= IOMAP_DIO_FORCE_WAIT; 5142 dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops, 5143 &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0); 5144 if (IS_ERR_OR_NULL(dio)) { 5145 ret = PTR_ERR_OR_ZERO(dio); 5146 if (ret == -ENOTBLK) 5147 ret = 0; 5148 if (ret != -EIOCBQUEUED) 5149 dec_page_count(sbi, F2FS_DIO_WRITE); 5150 } else { 5151 ret = iomap_dio_complete(dio); 5152 } 5153 5154 if (do_opu) 5155 f2fs_up_read(&fi->i_gc_rwsem[READ]); 5156 f2fs_up_read(&fi->i_gc_rwsem[WRITE]); 5157 5158 if (ret < 0) 5159 goto out; 5160 if (pos + ret > inode->i_size) 5161 f2fs_i_size_write(inode, pos + ret); 5162 if (!do_opu) 5163 set_inode_flag(inode, FI_UPDATE_WRITE); 5164 5165 if (iov_iter_count(from)) { 5166 ssize_t ret2; 5167 loff_t bufio_start_pos = iocb->ki_pos; 5168 5169 /* 5170 * The direct write was partial, so we need to fall back to a 5171 * buffered write for the remainder. 5172 */ 5173 5174 ret2 = f2fs_buffered_write_iter(iocb, from); 5175 if (iov_iter_count(from)) 5176 f2fs_write_failed(inode, iocb->ki_pos); 5177 if (ret2 < 0) 5178 goto out; 5179 5180 /* 5181 * Ensure that the pagecache pages are written to disk and 5182 * invalidated to preserve the expected O_DIRECT semantics. 5183 */ 5184 if (ret2 > 0) { 5185 loff_t bufio_end_pos = bufio_start_pos + ret2 - 1; 5186 5187 ret += ret2; 5188 5189 f2fs_flush_buffered_write(file->f_mapping, 5190 bufio_start_pos, 5191 bufio_end_pos); 5192 } 5193 } else { 5194 /* iomap_dio_rw() already handled the generic_write_sync(). */ 5195 *may_need_sync = false; 5196 } 5197 out: 5198 trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret); 5199 return ret; 5200 } 5201 5202 static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 5203 { 5204 struct inode *inode = file_inode(iocb->ki_filp); 5205 const loff_t orig_pos = iocb->ki_pos; 5206 const size_t orig_count = iov_iter_count(from); 5207 loff_t target_size; 5208 bool dio; 5209 bool may_need_sync = true; 5210 int preallocated; 5211 const loff_t pos = iocb->ki_pos; 5212 const ssize_t count = iov_iter_count(from); 5213 ssize_t ret; 5214 5215 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { 5216 ret = -EIO; 5217 goto out; 5218 } 5219 5220 if (!f2fs_is_compress_backend_ready(inode)) { 5221 ret = -EOPNOTSUPP; 5222 goto out; 5223 } 5224 5225 if (iocb->ki_flags & IOCB_NOWAIT) { 5226 if (!inode_trylock(inode)) { 5227 ret = -EAGAIN; 5228 goto out; 5229 } 5230 } else { 5231 inode_lock(inode); 5232 } 5233 5234 if (f2fs_is_pinned_file(inode) && 5235 !f2fs_overwrite_io(inode, pos, count)) { 5236 ret = -EIO; 5237 goto out_unlock; 5238 } 5239 5240 ret = f2fs_write_checks(iocb, from); 5241 if (ret <= 0) 5242 goto out_unlock; 5243 5244 /* Determine whether we will do a direct write or a buffered write. */ 5245 dio = f2fs_should_use_dio(inode, iocb, from); 5246 5247 /* dio is not compatible w/ atomic write */ 5248 if (dio && f2fs_is_atomic_file(inode)) { 5249 ret = -EOPNOTSUPP; 5250 goto out_unlock; 5251 } 5252 5253 /* Possibly preallocate the blocks for the write. */ 5254 target_size = iocb->ki_pos + iov_iter_count(from); 5255 preallocated = f2fs_preallocate_blocks(iocb, from, dio); 5256 if (preallocated < 0) { 5257 ret = preallocated; 5258 } else { 5259 if (trace_f2fs_datawrite_start_enabled()) 5260 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, 5261 orig_count, WRITE); 5262 5263 /* Do the actual write. */ 5264 ret = dio ? 5265 f2fs_dio_write_iter(iocb, from, &may_need_sync) : 5266 f2fs_buffered_write_iter(iocb, from); 5267 5268 trace_f2fs_datawrite_end(inode, orig_pos, ret); 5269 } 5270 5271 /* Don't leave any preallocated blocks around past i_size. */ 5272 if (preallocated && i_size_read(inode) < target_size) { 5273 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 5274 filemap_invalidate_lock(inode->i_mapping); 5275 if (!f2fs_truncate(inode)) 5276 file_dont_truncate(inode); 5277 filemap_invalidate_unlock(inode->i_mapping); 5278 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 5279 } else { 5280 file_dont_truncate(inode); 5281 } 5282 5283 clear_inode_flag(inode, FI_PREALLOCATED_ALL); 5284 out_unlock: 5285 inode_unlock(inode); 5286 out: 5287 trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret); 5288 5289 if (ret > 0 && may_need_sync) 5290 ret = generic_write_sync(iocb, ret); 5291 5292 /* If buffered IO was forced, flush and drop the data from 5293 * the page cache to preserve O_DIRECT semantics 5294 */ 5295 if (ret > 0 && !dio && (iocb->ki_flags & IOCB_DIRECT)) 5296 f2fs_flush_buffered_write(iocb->ki_filp->f_mapping, 5297 orig_pos, 5298 orig_pos + ret - 1); 5299 5300 return ret; 5301 } 5302 5303 static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len, 5304 int advice) 5305 { 5306 struct address_space *mapping; 5307 struct backing_dev_info *bdi; 5308 struct inode *inode = file_inode(filp); 5309 int err; 5310 5311 trace_f2fs_fadvise(inode, offset, len, advice); 5312 5313 if (advice == POSIX_FADV_SEQUENTIAL) { 5314 if (S_ISFIFO(inode->i_mode)) 5315 return -ESPIPE; 5316 5317 mapping = filp->f_mapping; 5318 if (!mapping || len < 0) 5319 return -EINVAL; 5320 5321 bdi = inode_to_bdi(mapping->host); 5322 filp->f_ra.ra_pages = bdi->ra_pages * 5323 F2FS_I_SB(inode)->seq_file_ra_mul; 5324 spin_lock(&filp->f_lock); 5325 filp->f_mode &= ~FMODE_RANDOM; 5326 spin_unlock(&filp->f_lock); 5327 return 0; 5328 } else if (advice == POSIX_FADV_WILLNEED && offset == 0) { 5329 /* Load extent cache at the first readahead. */ 5330 f2fs_precache_extents(inode); 5331 } 5332 5333 err = generic_fadvise(filp, offset, len, advice); 5334 if (err) 5335 return err; 5336 5337 if (advice == POSIX_FADV_DONTNEED && 5338 (test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) && 5339 f2fs_compressed_file(inode))) 5340 f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino); 5341 else if (advice == POSIX_FADV_NOREUSE) 5342 err = f2fs_keep_noreuse_range(inode, offset, len); 5343 return err; 5344 } 5345 5346 #ifdef CONFIG_COMPAT 5347 struct compat_f2fs_gc_range { 5348 u32 sync; 5349 compat_u64 start; 5350 compat_u64 len; 5351 }; 5352 #define F2FS_IOC32_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11,\ 5353 struct compat_f2fs_gc_range) 5354 5355 static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg) 5356 { 5357 struct compat_f2fs_gc_range __user *urange; 5358 struct f2fs_gc_range range; 5359 int err; 5360 5361 urange = compat_ptr(arg); 5362 err = get_user(range.sync, &urange->sync); 5363 err |= get_user(range.start, &urange->start); 5364 err |= get_user(range.len, &urange->len); 5365 if (err) 5366 return -EFAULT; 5367 5368 return __f2fs_ioc_gc_range(file, &range); 5369 } 5370 5371 struct compat_f2fs_move_range { 5372 u32 dst_fd; 5373 compat_u64 pos_in; 5374 compat_u64 pos_out; 5375 compat_u64 len; 5376 }; 5377 #define F2FS_IOC32_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ 5378 struct compat_f2fs_move_range) 5379 5380 static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg) 5381 { 5382 struct compat_f2fs_move_range __user *urange; 5383 struct f2fs_move_range range; 5384 int err; 5385 5386 urange = compat_ptr(arg); 5387 err = get_user(range.dst_fd, &urange->dst_fd); 5388 err |= get_user(range.pos_in, &urange->pos_in); 5389 err |= get_user(range.pos_out, &urange->pos_out); 5390 err |= get_user(range.len, &urange->len); 5391 if (err) 5392 return -EFAULT; 5393 5394 return __f2fs_ioc_move_range(file, &range); 5395 } 5396 5397 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 5398 { 5399 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) 5400 return -EIO; 5401 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file)))) 5402 return -ENOSPC; 5403 5404 switch (cmd) { 5405 case FS_IOC32_GETVERSION: 5406 cmd = FS_IOC_GETVERSION; 5407 break; 5408 case F2FS_IOC32_GARBAGE_COLLECT_RANGE: 5409 return f2fs_compat_ioc_gc_range(file, arg); 5410 case F2FS_IOC32_MOVE_RANGE: 5411 return f2fs_compat_ioc_move_range(file, arg); 5412 case F2FS_IOC_START_ATOMIC_WRITE: 5413 case F2FS_IOC_START_ATOMIC_REPLACE: 5414 case F2FS_IOC_COMMIT_ATOMIC_WRITE: 5415 case F2FS_IOC_START_VOLATILE_WRITE: 5416 case F2FS_IOC_RELEASE_VOLATILE_WRITE: 5417 case F2FS_IOC_ABORT_ATOMIC_WRITE: 5418 case F2FS_IOC_SHUTDOWN: 5419 case FITRIM: 5420 case FS_IOC_SET_ENCRYPTION_POLICY: 5421 case FS_IOC_GET_ENCRYPTION_PWSALT: 5422 case FS_IOC_GET_ENCRYPTION_POLICY: 5423 case FS_IOC_GET_ENCRYPTION_POLICY_EX: 5424 case FS_IOC_ADD_ENCRYPTION_KEY: 5425 case FS_IOC_REMOVE_ENCRYPTION_KEY: 5426 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: 5427 case FS_IOC_GET_ENCRYPTION_KEY_STATUS: 5428 case FS_IOC_GET_ENCRYPTION_NONCE: 5429 case F2FS_IOC_GARBAGE_COLLECT: 5430 case F2FS_IOC_WRITE_CHECKPOINT: 5431 case F2FS_IOC_DEFRAGMENT: 5432 case F2FS_IOC_FLUSH_DEVICE: 5433 case F2FS_IOC_GET_FEATURES: 5434 case F2FS_IOC_GET_PIN_FILE: 5435 case F2FS_IOC_SET_PIN_FILE: 5436 case F2FS_IOC_PRECACHE_EXTENTS: 5437 case F2FS_IOC_RESIZE_FS: 5438 case FS_IOC_ENABLE_VERITY: 5439 case FS_IOC_MEASURE_VERITY: 5440 case FS_IOC_READ_VERITY_METADATA: 5441 case FS_IOC_GETFSLABEL: 5442 case FS_IOC_SETFSLABEL: 5443 case F2FS_IOC_GET_COMPRESS_BLOCKS: 5444 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: 5445 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: 5446 case F2FS_IOC_SEC_TRIM_FILE: 5447 case F2FS_IOC_GET_COMPRESS_OPTION: 5448 case F2FS_IOC_SET_COMPRESS_OPTION: 5449 case F2FS_IOC_DECOMPRESS_FILE: 5450 case F2FS_IOC_COMPRESS_FILE: 5451 case F2FS_IOC_GET_DEV_ALIAS_FILE: 5452 case F2FS_IOC_IO_PRIO: 5453 break; 5454 default: 5455 return -ENOIOCTLCMD; 5456 } 5457 return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); 5458 } 5459 #endif 5460 5461 const struct file_operations f2fs_file_operations = { 5462 .llseek = f2fs_llseek, 5463 .read_iter = f2fs_file_read_iter, 5464 .write_iter = f2fs_file_write_iter, 5465 .iopoll = iocb_bio_iopoll, 5466 .open = f2fs_file_open, 5467 .release = f2fs_release_file, 5468 .mmap_prepare = f2fs_file_mmap_prepare, 5469 .flush = f2fs_file_flush, 5470 .fsync = f2fs_sync_file, 5471 .fallocate = f2fs_fallocate, 5472 .unlocked_ioctl = f2fs_ioctl, 5473 #ifdef CONFIG_COMPAT 5474 .compat_ioctl = f2fs_compat_ioctl, 5475 #endif 5476 .splice_read = f2fs_file_splice_read, 5477 .splice_write = iter_file_splice_write, 5478 .fadvise = f2fs_file_fadvise, 5479 .fop_flags = FOP_BUFFER_RASYNC, 5480 .setlease = generic_setlease, 5481 }; 5482