1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/f2fs/file.c 4 * 5 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 6 * http://www.samsung.com/ 7 */ 8 #include <linux/blk-crypto.h> 9 #include <linux/fs.h> 10 #include <linux/f2fs_fs.h> 11 #include <linux/stat.h> 12 #include <linux/writeback.h> 13 #include <linux/blkdev.h> 14 #include <linux/falloc.h> 15 #include <linux/filelock.h> 16 #include <linux/types.h> 17 #include <linux/compat.h> 18 #include <linux/uaccess.h> 19 #include <linux/mount.h> 20 #include <linux/pagevec.h> 21 #include <linux/uio.h> 22 #include <linux/uuid.h> 23 #include <linux/file.h> 24 #include <linux/nls.h> 25 #include <linux/sched/signal.h> 26 #include <linux/fileattr.h> 27 #include <linux/fadvise.h> 28 #include <linux/iomap.h> 29 30 #include "f2fs.h" 31 #include "node.h" 32 #include "segment.h" 33 #include "xattr.h" 34 #include "acl.h" 35 #include "gc.h" 36 #include "iostat.h" 37 #include <trace/events/f2fs.h> 38 #include <uapi/linux/f2fs.h> 39 40 static void f2fs_zero_post_eof_page(struct inode *inode, 41 loff_t new_size, bool lock) 42 { 43 loff_t old_size = i_size_read(inode); 44 45 if (old_size >= new_size) 46 return; 47 48 if (mapping_empty(inode->i_mapping)) 49 return; 50 51 if (lock) 52 filemap_invalidate_lock(inode->i_mapping); 53 /* zero or drop pages only in range of [old_size, new_size] */ 54 truncate_inode_pages_range(inode->i_mapping, old_size, new_size); 55 if (lock) 56 filemap_invalidate_unlock(inode->i_mapping); 57 } 58 59 static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) 60 { 61 struct inode *inode = file_inode(vmf->vma->vm_file); 62 vm_flags_t flags = vmf->vma->vm_flags; 63 vm_fault_t ret; 64 65 ret = filemap_fault(vmf); 66 if (ret & VM_FAULT_LOCKED) 67 f2fs_update_iostat(F2FS_I_SB(inode), inode, 68 APP_MAPPED_READ_IO, F2FS_BLKSIZE); 69 70 trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret); 71 72 return ret; 73 } 74 75 static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) 76 { 77 struct folio *folio = page_folio(vmf->page); 78 struct inode *inode = file_inode(vmf->vma->vm_file); 79 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 80 struct dnode_of_data dn; 81 bool need_alloc = !f2fs_is_pinned_file(inode); 82 int err = 0; 83 vm_fault_t ret; 84 85 if (unlikely(IS_IMMUTABLE(inode))) 86 return VM_FAULT_SIGBUS; 87 88 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 89 err = -EIO; 90 goto out; 91 } 92 93 if (unlikely(f2fs_cp_error(sbi))) { 94 err = -EIO; 95 goto out; 96 } 97 98 if (!f2fs_is_checkpoint_ready(sbi)) { 99 err = -ENOSPC; 100 goto out; 101 } 102 103 err = f2fs_convert_inline_inode(inode); 104 if (err) 105 goto out; 106 107 #ifdef CONFIG_F2FS_FS_COMPRESSION 108 if (f2fs_compressed_file(inode)) { 109 int ret = f2fs_is_compressed_cluster(inode, folio->index); 110 111 if (ret < 0) { 112 err = ret; 113 goto out; 114 } else if (ret) { 115 need_alloc = false; 116 } 117 } 118 #endif 119 /* should do out of any locked page */ 120 if (need_alloc) 121 f2fs_balance_fs(sbi, true); 122 123 sb_start_pagefault(inode->i_sb); 124 125 f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); 126 127 f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT, true); 128 129 file_update_time(vmf->vma->vm_file); 130 filemap_invalidate_lock_shared(inode->i_mapping); 131 132 folio_lock(folio); 133 if (unlikely(folio->mapping != inode->i_mapping || 134 folio_pos(folio) > i_size_read(inode) || 135 !folio_test_uptodate(folio))) { 136 folio_unlock(folio); 137 err = -EFAULT; 138 goto out_sem; 139 } 140 141 set_new_dnode(&dn, inode, NULL, NULL, 0); 142 if (need_alloc) { 143 /* block allocation */ 144 err = f2fs_get_block_locked(&dn, folio->index); 145 } else { 146 err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); 147 f2fs_put_dnode(&dn); 148 if (f2fs_is_pinned_file(inode) && 149 !__is_valid_data_blkaddr(dn.data_blkaddr)) 150 err = -EIO; 151 } 152 153 if (err) { 154 folio_unlock(folio); 155 goto out_sem; 156 } 157 158 f2fs_folio_wait_writeback(folio, DATA, false, true); 159 160 /* wait for GCed page writeback via META_MAPPING */ 161 f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); 162 163 /* 164 * check to see if the page is mapped already (no holes) 165 */ 166 if (folio_test_mappedtodisk(folio)) 167 goto out_sem; 168 169 /* page is wholly or partially inside EOF */ 170 if (((loff_t)(folio->index + 1) << PAGE_SHIFT) > 171 i_size_read(inode)) { 172 loff_t offset; 173 174 offset = i_size_read(inode) & ~PAGE_MASK; 175 folio_zero_segment(folio, offset, folio_size(folio)); 176 } 177 folio_mark_dirty(folio); 178 179 f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE); 180 f2fs_update_time(sbi, REQ_TIME); 181 182 out_sem: 183 filemap_invalidate_unlock_shared(inode->i_mapping); 184 185 sb_end_pagefault(inode->i_sb); 186 out: 187 ret = vmf_fs_error(err); 188 189 trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret); 190 return ret; 191 } 192 193 static const struct vm_operations_struct f2fs_file_vm_ops = { 194 .fault = f2fs_filemap_fault, 195 .map_pages = filemap_map_pages, 196 .page_mkwrite = f2fs_vm_page_mkwrite, 197 }; 198 199 static int get_parent_ino(struct inode *inode, nid_t *pino) 200 { 201 struct dentry *dentry; 202 203 /* 204 * Make sure to get the non-deleted alias. The alias associated with 205 * the open file descriptor being fsync()'ed may be deleted already. 206 */ 207 dentry = d_find_alias(inode); 208 if (!dentry) 209 return 0; 210 211 *pino = d_parent_ino(dentry); 212 dput(dentry); 213 return 1; 214 } 215 216 static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) 217 { 218 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 219 enum cp_reason_type cp_reason = CP_NO_NEEDED; 220 221 if (!S_ISREG(inode->i_mode)) 222 cp_reason = CP_NON_REGULAR; 223 else if (f2fs_compressed_file(inode)) 224 cp_reason = CP_COMPRESSED; 225 else if (inode->i_nlink != 1) 226 cp_reason = CP_HARDLINK; 227 else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) 228 cp_reason = CP_SB_NEED_CP; 229 else if (file_wrong_pino(inode)) 230 cp_reason = CP_WRONG_PINO; 231 else if (!f2fs_space_for_roll_forward(sbi)) 232 cp_reason = CP_NO_SPC_ROLL; 233 else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) 234 cp_reason = CP_NODE_NEED_CP; 235 else if (test_opt(sbi, FASTBOOT)) 236 cp_reason = CP_FASTBOOT_MODE; 237 else if (F2FS_OPTION(sbi).active_logs == 2) 238 cp_reason = CP_SPEC_LOG_NUM; 239 else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT && 240 f2fs_need_dentry_mark(sbi, inode->i_ino) && 241 f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, 242 TRANS_DIR_INO)) 243 cp_reason = CP_RECOVER_DIR; 244 else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, 245 XATTR_DIR_INO)) 246 cp_reason = CP_XATTR_DIR; 247 248 return cp_reason; 249 } 250 251 static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) 252 { 253 struct folio *i = filemap_get_folio(NODE_MAPPING(sbi), ino); 254 bool ret = false; 255 /* But we need to avoid that there are some inode updates */ 256 if ((!IS_ERR(i) && folio_test_dirty(i)) || 257 f2fs_need_inode_block_update(sbi, ino)) 258 ret = true; 259 f2fs_folio_put(i, false); 260 return ret; 261 } 262 263 static void try_to_fix_pino(struct inode *inode) 264 { 265 struct f2fs_inode_info *fi = F2FS_I(inode); 266 nid_t pino; 267 268 f2fs_down_write(&fi->i_sem); 269 if (file_wrong_pino(inode) && inode->i_nlink == 1 && 270 get_parent_ino(inode, &pino)) { 271 f2fs_i_pino_write(inode, pino); 272 file_got_pino(inode); 273 } 274 f2fs_up_write(&fi->i_sem); 275 } 276 277 static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, 278 int datasync, bool atomic) 279 { 280 struct inode *inode = file->f_mapping->host; 281 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 282 nid_t ino = inode->i_ino; 283 int ret = 0; 284 enum cp_reason_type cp_reason = 0; 285 struct writeback_control wbc = { 286 .sync_mode = WB_SYNC_ALL, 287 .nr_to_write = LONG_MAX, 288 }; 289 unsigned int seq_id = 0; 290 291 if (unlikely(f2fs_readonly(inode->i_sb))) 292 return 0; 293 294 trace_f2fs_sync_file_enter(inode); 295 296 if (S_ISDIR(inode->i_mode)) 297 goto go_write; 298 299 /* if fdatasync is triggered, let's do in-place-update */ 300 if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) 301 set_inode_flag(inode, FI_NEED_IPU); 302 ret = file_write_and_wait_range(file, start, end); 303 clear_inode_flag(inode, FI_NEED_IPU); 304 305 if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { 306 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); 307 return ret; 308 } 309 310 /* if the inode is dirty, let's recover all the time */ 311 if (!f2fs_skip_inode_update(inode, datasync)) { 312 f2fs_write_inode(inode, NULL); 313 goto go_write; 314 } 315 316 /* 317 * if there is no written data, don't waste time to write recovery info. 318 */ 319 if (!is_inode_flag_set(inode, FI_APPEND_WRITE) && 320 !f2fs_exist_written_data(sbi, ino, APPEND_INO)) { 321 322 /* it may call write_inode just prior to fsync */ 323 if (need_inode_page_update(sbi, ino)) 324 goto go_write; 325 326 if (is_inode_flag_set(inode, FI_UPDATE_WRITE) || 327 f2fs_exist_written_data(sbi, ino, UPDATE_INO)) 328 goto flush_out; 329 goto out; 330 } else { 331 /* 332 * for OPU case, during fsync(), node can be persisted before 333 * data when lower device doesn't support write barrier, result 334 * in data corruption after SPO. 335 * So for strict fsync mode, force to use atomic write semantics 336 * to keep write order in between data/node and last node to 337 * avoid potential data corruption. 338 */ 339 if (F2FS_OPTION(sbi).fsync_mode == 340 FSYNC_MODE_STRICT && !atomic) 341 atomic = true; 342 } 343 go_write: 344 /* 345 * Both of fdatasync() and fsync() are able to be recovered from 346 * sudden-power-off. 347 */ 348 f2fs_down_read(&F2FS_I(inode)->i_sem); 349 cp_reason = need_do_checkpoint(inode); 350 f2fs_up_read(&F2FS_I(inode)->i_sem); 351 352 if (cp_reason) { 353 /* all the dirty node pages should be flushed for POR */ 354 ret = f2fs_sync_fs(inode->i_sb, 1); 355 356 /* 357 * We've secured consistency through sync_fs. Following pino 358 * will be used only for fsynced inodes after checkpoint. 359 */ 360 try_to_fix_pino(inode); 361 clear_inode_flag(inode, FI_APPEND_WRITE); 362 clear_inode_flag(inode, FI_UPDATE_WRITE); 363 goto out; 364 } 365 sync_nodes: 366 atomic_inc(&sbi->wb_sync_req[NODE]); 367 ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id); 368 atomic_dec(&sbi->wb_sync_req[NODE]); 369 if (ret) 370 goto out; 371 372 /* if cp_error was enabled, we should avoid infinite loop */ 373 if (unlikely(f2fs_cp_error(sbi))) { 374 ret = -EIO; 375 goto out; 376 } 377 378 if (f2fs_need_inode_block_update(sbi, ino)) { 379 f2fs_mark_inode_dirty_sync(inode, true); 380 f2fs_write_inode(inode, NULL); 381 goto sync_nodes; 382 } 383 384 /* 385 * If it's atomic_write, it's just fine to keep write ordering. So 386 * here we don't need to wait for node write completion, since we use 387 * node chain which serializes node blocks. If one of node writes are 388 * reordered, we can see simply broken chain, resulting in stopping 389 * roll-forward recovery. It means we'll recover all or none node blocks 390 * given fsync mark. 391 */ 392 if (!atomic) { 393 ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id); 394 if (ret) 395 goto out; 396 } 397 398 /* once recovery info is written, don't need to tack this */ 399 f2fs_remove_ino_entry(sbi, ino, APPEND_INO); 400 clear_inode_flag(inode, FI_APPEND_WRITE); 401 flush_out: 402 if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) 403 ret = f2fs_issue_flush(sbi, inode->i_ino); 404 if (!ret) { 405 f2fs_remove_ino_entry(sbi, ino, UPDATE_INO); 406 clear_inode_flag(inode, FI_UPDATE_WRITE); 407 f2fs_remove_ino_entry(sbi, ino, FLUSH_INO); 408 } 409 f2fs_update_time(sbi, REQ_TIME); 410 out: 411 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); 412 return ret; 413 } 414 415 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 416 { 417 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) 418 return -EIO; 419 return f2fs_do_sync_file(file, start, end, datasync, false); 420 } 421 422 static bool __found_offset(struct address_space *mapping, 423 struct dnode_of_data *dn, pgoff_t index, int whence) 424 { 425 block_t blkaddr = f2fs_data_blkaddr(dn); 426 struct inode *inode = mapping->host; 427 bool compressed_cluster = false; 428 429 if (f2fs_compressed_file(inode)) { 430 block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_folio, 431 ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size)); 432 433 compressed_cluster = first_blkaddr == COMPRESS_ADDR; 434 } 435 436 switch (whence) { 437 case SEEK_DATA: 438 if (__is_valid_data_blkaddr(blkaddr)) 439 return true; 440 if (blkaddr == NEW_ADDR && 441 xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY)) 442 return true; 443 if (compressed_cluster) 444 return true; 445 break; 446 case SEEK_HOLE: 447 if (compressed_cluster) 448 return false; 449 if (blkaddr == NULL_ADDR) 450 return true; 451 break; 452 } 453 return false; 454 } 455 456 static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) 457 { 458 struct inode *inode = file->f_mapping->host; 459 loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 460 struct dnode_of_data dn; 461 pgoff_t pgofs, end_offset; 462 loff_t data_ofs = offset; 463 loff_t isize; 464 int err = 0; 465 466 inode_lock_shared(inode); 467 468 isize = i_size_read(inode); 469 if (offset >= isize) 470 goto fail; 471 472 /* handle inline data case */ 473 if (f2fs_has_inline_data(inode)) { 474 if (whence == SEEK_HOLE) { 475 data_ofs = isize; 476 goto found; 477 } else if (whence == SEEK_DATA) { 478 data_ofs = offset; 479 goto found; 480 } 481 } 482 483 pgofs = (pgoff_t)(offset >> PAGE_SHIFT); 484 485 for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) { 486 set_new_dnode(&dn, inode, NULL, NULL, 0); 487 err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE); 488 if (err && err != -ENOENT) { 489 goto fail; 490 } else if (err == -ENOENT) { 491 /* direct node does not exists */ 492 if (whence == SEEK_DATA) { 493 pgofs = f2fs_get_next_page_offset(&dn, pgofs); 494 continue; 495 } else { 496 goto found; 497 } 498 } 499 500 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 501 502 /* find data/hole in dnode block */ 503 for (; dn.ofs_in_node < end_offset; 504 dn.ofs_in_node++, pgofs++, 505 data_ofs = (loff_t)pgofs << PAGE_SHIFT) { 506 block_t blkaddr; 507 508 blkaddr = f2fs_data_blkaddr(&dn); 509 510 if (__is_valid_data_blkaddr(blkaddr) && 511 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), 512 blkaddr, DATA_GENERIC_ENHANCE)) { 513 f2fs_put_dnode(&dn); 514 goto fail; 515 } 516 517 if (__found_offset(file->f_mapping, &dn, 518 pgofs, whence)) { 519 f2fs_put_dnode(&dn); 520 goto found; 521 } 522 } 523 f2fs_put_dnode(&dn); 524 } 525 526 if (whence == SEEK_DATA) 527 goto fail; 528 found: 529 if (whence == SEEK_HOLE && data_ofs > isize) 530 data_ofs = isize; 531 inode_unlock_shared(inode); 532 return vfs_setpos(file, data_ofs, maxbytes); 533 fail: 534 inode_unlock_shared(inode); 535 return -ENXIO; 536 } 537 538 static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) 539 { 540 struct inode *inode = file->f_mapping->host; 541 loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 542 543 switch (whence) { 544 case SEEK_SET: 545 case SEEK_CUR: 546 case SEEK_END: 547 return generic_file_llseek_size(file, offset, whence, 548 maxbytes, i_size_read(inode)); 549 case SEEK_DATA: 550 case SEEK_HOLE: 551 if (offset < 0) 552 return -ENXIO; 553 return f2fs_seek_block(file, offset, whence); 554 } 555 556 return -EINVAL; 557 } 558 559 static int f2fs_file_mmap_prepare(struct vm_area_desc *desc) 560 { 561 struct file *file = desc->file; 562 struct inode *inode = file_inode(file); 563 564 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 565 return -EIO; 566 567 if (!f2fs_is_compress_backend_ready(inode)) 568 return -EOPNOTSUPP; 569 570 file_accessed(file); 571 desc->vm_ops = &f2fs_file_vm_ops; 572 573 f2fs_down_read(&F2FS_I(inode)->i_sem); 574 set_inode_flag(inode, FI_MMAP_FILE); 575 f2fs_up_read(&F2FS_I(inode)->i_sem); 576 577 return 0; 578 } 579 580 static int finish_preallocate_blocks(struct inode *inode) 581 { 582 int ret = 0; 583 bool opened; 584 585 f2fs_down_read(&F2FS_I(inode)->i_sem); 586 opened = is_inode_flag_set(inode, FI_OPENED_FILE); 587 f2fs_up_read(&F2FS_I(inode)->i_sem); 588 if (opened) 589 return 0; 590 591 inode_lock(inode); 592 if (is_inode_flag_set(inode, FI_OPENED_FILE)) 593 goto out_unlock; 594 595 if (!file_should_truncate(inode)) 596 goto out_update; 597 598 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 599 filemap_invalidate_lock(inode->i_mapping); 600 601 truncate_setsize(inode, i_size_read(inode)); 602 ret = f2fs_truncate(inode); 603 604 filemap_invalidate_unlock(inode->i_mapping); 605 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 606 if (ret) 607 goto out_unlock; 608 609 file_dont_truncate(inode); 610 out_update: 611 f2fs_down_write(&F2FS_I(inode)->i_sem); 612 set_inode_flag(inode, FI_OPENED_FILE); 613 f2fs_up_write(&F2FS_I(inode)->i_sem); 614 out_unlock: 615 inode_unlock(inode); 616 return ret; 617 } 618 619 static int f2fs_file_open(struct inode *inode, struct file *filp) 620 { 621 int err = fscrypt_file_open(inode, filp); 622 623 if (err) 624 return err; 625 626 if (!f2fs_is_compress_backend_ready(inode)) 627 return -EOPNOTSUPP; 628 629 err = fsverity_file_open(inode, filp); 630 if (err) 631 return err; 632 633 filp->f_mode |= FMODE_NOWAIT; 634 filp->f_mode |= FMODE_CAN_ODIRECT; 635 636 err = dquot_file_open(inode, filp); 637 if (err) 638 return err; 639 640 err = finish_preallocate_blocks(inode); 641 if (!err) 642 atomic_inc(&F2FS_I(inode)->open_count); 643 return err; 644 } 645 646 void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) 647 { 648 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 649 int nr_free = 0, ofs = dn->ofs_in_node, len = count; 650 __le32 *addr; 651 bool compressed_cluster = false; 652 int cluster_index = 0, valid_blocks = 0; 653 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 654 bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks); 655 block_t blkstart; 656 int blklen = 0; 657 658 addr = get_dnode_addr(dn->inode, dn->node_folio) + ofs; 659 blkstart = le32_to_cpu(*addr); 660 661 /* Assumption: truncation starts with cluster */ 662 for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { 663 block_t blkaddr = le32_to_cpu(*addr); 664 665 if (f2fs_compressed_file(dn->inode) && 666 !(cluster_index & (cluster_size - 1))) { 667 if (compressed_cluster) 668 f2fs_i_compr_blocks_update(dn->inode, 669 valid_blocks, false); 670 compressed_cluster = (blkaddr == COMPRESS_ADDR); 671 valid_blocks = 0; 672 } 673 674 if (blkaddr == NULL_ADDR) 675 goto next; 676 677 f2fs_set_data_blkaddr(dn, NULL_ADDR); 678 679 if (__is_valid_data_blkaddr(blkaddr)) { 680 if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE)) 681 goto next; 682 if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr, 683 DATA_GENERIC_ENHANCE)) 684 goto next; 685 if (compressed_cluster) 686 valid_blocks++; 687 } 688 689 if (blkstart + blklen == blkaddr) { 690 blklen++; 691 } else { 692 f2fs_invalidate_blocks(sbi, blkstart, blklen); 693 blkstart = blkaddr; 694 blklen = 1; 695 } 696 697 if (!released || blkaddr != COMPRESS_ADDR) 698 nr_free++; 699 700 continue; 701 702 next: 703 if (blklen) 704 f2fs_invalidate_blocks(sbi, blkstart, blklen); 705 706 blkstart = le32_to_cpu(*(addr + 1)); 707 blklen = 0; 708 } 709 710 if (blklen) 711 f2fs_invalidate_blocks(sbi, blkstart, blklen); 712 713 if (compressed_cluster) 714 f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false); 715 716 if (nr_free) { 717 pgoff_t fofs; 718 /* 719 * once we invalidate valid blkaddr in range [ofs, ofs + count], 720 * we will invalidate all blkaddr in the whole range. 721 */ 722 fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), 723 dn->inode) + ofs; 724 f2fs_update_read_extent_cache_range(dn, fofs, 0, len); 725 f2fs_update_age_extent_cache_range(dn, fofs, len); 726 dec_valid_block_count(sbi, dn->inode, nr_free); 727 } 728 dn->ofs_in_node = ofs; 729 730 f2fs_update_time(sbi, REQ_TIME); 731 trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, 732 dn->ofs_in_node, nr_free); 733 } 734 735 static int truncate_partial_data_page(struct inode *inode, u64 from, 736 bool cache_only) 737 { 738 loff_t offset = from & (PAGE_SIZE - 1); 739 pgoff_t index = from >> PAGE_SHIFT; 740 struct address_space *mapping = inode->i_mapping; 741 struct folio *folio; 742 743 if (!offset && !cache_only) 744 return 0; 745 746 if (cache_only) { 747 folio = filemap_lock_folio(mapping, index); 748 if (IS_ERR(folio)) 749 return 0; 750 if (folio_test_uptodate(folio)) 751 goto truncate_out; 752 f2fs_folio_put(folio, true); 753 return 0; 754 } 755 756 folio = f2fs_get_lock_data_folio(inode, index, true); 757 if (IS_ERR(folio)) 758 return PTR_ERR(folio) == -ENOENT ? 0 : PTR_ERR(folio); 759 truncate_out: 760 f2fs_folio_wait_writeback(folio, DATA, true, true); 761 folio_zero_segment(folio, offset, folio_size(folio)); 762 763 /* An encrypted inode should have a key and truncate the last page. */ 764 f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode)); 765 if (!cache_only) 766 folio_mark_dirty(folio); 767 f2fs_folio_put(folio, true); 768 return 0; 769 } 770 771 int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) 772 { 773 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 774 struct dnode_of_data dn; 775 pgoff_t free_from; 776 int count = 0, err = 0; 777 struct folio *ifolio; 778 bool truncate_page = false; 779 780 trace_f2fs_truncate_blocks_enter(inode, from); 781 782 if (IS_DEVICE_ALIASING(inode) && from) { 783 err = -EINVAL; 784 goto out_err; 785 } 786 787 free_from = (pgoff_t)F2FS_BLK_ALIGN(from); 788 789 if (free_from >= max_file_blocks(inode)) 790 goto free_partial; 791 792 if (lock) 793 f2fs_lock_op(sbi); 794 795 ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); 796 if (IS_ERR(ifolio)) { 797 err = PTR_ERR(ifolio); 798 goto out; 799 } 800 801 if (IS_DEVICE_ALIASING(inode)) { 802 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; 803 struct extent_info ei = et->largest; 804 805 f2fs_invalidate_blocks(sbi, ei.blk, ei.len); 806 807 dec_valid_block_count(sbi, inode, ei.len); 808 f2fs_update_time(sbi, REQ_TIME); 809 810 f2fs_folio_put(ifolio, true); 811 goto out; 812 } 813 814 if (f2fs_has_inline_data(inode)) { 815 f2fs_truncate_inline_inode(inode, ifolio, from); 816 f2fs_folio_put(ifolio, true); 817 truncate_page = true; 818 goto out; 819 } 820 821 set_new_dnode(&dn, inode, ifolio, NULL, 0); 822 err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); 823 if (err) { 824 if (err == -ENOENT) 825 goto free_next; 826 goto out; 827 } 828 829 count = ADDRS_PER_PAGE(dn.node_folio, inode); 830 831 count -= dn.ofs_in_node; 832 f2fs_bug_on(sbi, count < 0); 833 834 if (dn.ofs_in_node || IS_INODE(dn.node_folio)) { 835 f2fs_truncate_data_blocks_range(&dn, count); 836 free_from += count; 837 } 838 839 f2fs_put_dnode(&dn); 840 free_next: 841 err = f2fs_truncate_inode_blocks(inode, free_from); 842 out: 843 if (lock) 844 f2fs_unlock_op(sbi); 845 free_partial: 846 /* lastly zero out the first data page */ 847 if (!err) 848 err = truncate_partial_data_page(inode, from, truncate_page); 849 out_err: 850 trace_f2fs_truncate_blocks_exit(inode, err); 851 return err; 852 } 853 854 int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) 855 { 856 u64 free_from = from; 857 int err; 858 859 #ifdef CONFIG_F2FS_FS_COMPRESSION 860 /* 861 * for compressed file, only support cluster size 862 * aligned truncation. 863 */ 864 if (f2fs_compressed_file(inode)) 865 free_from = round_up(from, 866 F2FS_I(inode)->i_cluster_size << PAGE_SHIFT); 867 #endif 868 869 err = f2fs_do_truncate_blocks(inode, free_from, lock); 870 if (err) 871 return err; 872 873 #ifdef CONFIG_F2FS_FS_COMPRESSION 874 /* 875 * For compressed file, after release compress blocks, don't allow write 876 * direct, but we should allow write direct after truncate to zero. 877 */ 878 if (f2fs_compressed_file(inode) && !free_from 879 && is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) 880 clear_inode_flag(inode, FI_COMPRESS_RELEASED); 881 882 if (from != free_from) { 883 err = f2fs_truncate_partial_cluster(inode, from, lock); 884 if (err) 885 return err; 886 } 887 #endif 888 889 return 0; 890 } 891 892 int f2fs_truncate(struct inode *inode) 893 { 894 int err; 895 896 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 897 return -EIO; 898 899 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 900 S_ISLNK(inode->i_mode))) 901 return 0; 902 903 trace_f2fs_truncate(inode); 904 905 if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) 906 return -EIO; 907 908 err = f2fs_dquot_initialize(inode); 909 if (err) 910 return err; 911 912 /* we should check inline_data size */ 913 if (!f2fs_may_inline_data(inode)) { 914 err = f2fs_convert_inline_inode(inode); 915 if (err) { 916 /* 917 * Always truncate page #0 to avoid page cache 918 * leak in evict() path. 919 */ 920 truncate_inode_pages_range(inode->i_mapping, 921 F2FS_BLK_TO_BYTES(0), 922 F2FS_BLK_END_BYTES(0)); 923 return err; 924 } 925 } 926 927 err = f2fs_truncate_blocks(inode, i_size_read(inode), true); 928 if (err) 929 return err; 930 931 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 932 f2fs_mark_inode_dirty_sync(inode, false); 933 return 0; 934 } 935 936 static bool f2fs_force_buffered_io(struct inode *inode, int rw) 937 { 938 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 939 940 if (!fscrypt_dio_supported(inode)) 941 return true; 942 if (fsverity_active(inode)) 943 return true; 944 if (f2fs_compressed_file(inode)) 945 return true; 946 /* 947 * only force direct read to use buffered IO, for direct write, 948 * it expects inline data conversion before committing IO. 949 */ 950 if (f2fs_has_inline_data(inode) && rw == READ) 951 return true; 952 953 /* disallow direct IO if any of devices has unaligned blksize */ 954 if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) 955 return true; 956 /* 957 * for blkzoned device, fallback direct IO to buffered IO, so 958 * all IOs can be serialized by log-structured write. 959 */ 960 if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) && 961 !f2fs_is_pinned_file(inode)) 962 return true; 963 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) 964 return true; 965 966 return false; 967 } 968 969 int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path, 970 struct kstat *stat, u32 request_mask, unsigned int query_flags) 971 { 972 struct inode *inode = d_inode(path->dentry); 973 struct f2fs_inode_info *fi = F2FS_I(inode); 974 struct f2fs_inode *ri = NULL; 975 unsigned int flags; 976 977 if (f2fs_has_extra_attr(inode) && 978 f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && 979 F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { 980 stat->result_mask |= STATX_BTIME; 981 stat->btime.tv_sec = fi->i_crtime.tv_sec; 982 stat->btime.tv_nsec = fi->i_crtime.tv_nsec; 983 } 984 985 /* 986 * Return the DIO alignment restrictions if requested. We only return 987 * this information when requested, since on encrypted files it might 988 * take a fair bit of work to get if the file wasn't opened recently. 989 * 990 * f2fs sometimes supports DIO reads but not DIO writes. STATX_DIOALIGN 991 * cannot represent that, so in that case we report no DIO support. 992 */ 993 if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) { 994 unsigned int bsize = i_blocksize(inode); 995 996 stat->result_mask |= STATX_DIOALIGN; 997 if (!f2fs_force_buffered_io(inode, WRITE)) { 998 stat->dio_mem_align = bsize; 999 stat->dio_offset_align = bsize; 1000 } 1001 } 1002 1003 flags = fi->i_flags; 1004 if (flags & F2FS_COMPR_FL) 1005 stat->attributes |= STATX_ATTR_COMPRESSED; 1006 if (flags & F2FS_APPEND_FL) 1007 stat->attributes |= STATX_ATTR_APPEND; 1008 if (IS_ENCRYPTED(inode)) 1009 stat->attributes |= STATX_ATTR_ENCRYPTED; 1010 if (flags & F2FS_IMMUTABLE_FL) 1011 stat->attributes |= STATX_ATTR_IMMUTABLE; 1012 if (flags & F2FS_NODUMP_FL) 1013 stat->attributes |= STATX_ATTR_NODUMP; 1014 if (IS_VERITY(inode)) 1015 stat->attributes |= STATX_ATTR_VERITY; 1016 1017 stat->attributes_mask |= (STATX_ATTR_COMPRESSED | 1018 STATX_ATTR_APPEND | 1019 STATX_ATTR_ENCRYPTED | 1020 STATX_ATTR_IMMUTABLE | 1021 STATX_ATTR_NODUMP | 1022 STATX_ATTR_VERITY); 1023 1024 generic_fillattr(idmap, request_mask, inode, stat); 1025 1026 /* we need to show initial sectors used for inline_data/dentries */ 1027 if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) || 1028 f2fs_has_inline_dentry(inode)) 1029 stat->blocks += (stat->size + 511) >> 9; 1030 1031 return 0; 1032 } 1033 1034 #ifdef CONFIG_F2FS_FS_POSIX_ACL 1035 static void __setattr_copy(struct mnt_idmap *idmap, 1036 struct inode *inode, const struct iattr *attr) 1037 { 1038 unsigned int ia_valid = attr->ia_valid; 1039 1040 i_uid_update(idmap, attr, inode); 1041 i_gid_update(idmap, attr, inode); 1042 if (ia_valid & ATTR_ATIME) 1043 inode_set_atime_to_ts(inode, attr->ia_atime); 1044 if (ia_valid & ATTR_MTIME) 1045 inode_set_mtime_to_ts(inode, attr->ia_mtime); 1046 if (ia_valid & ATTR_CTIME) 1047 inode_set_ctime_to_ts(inode, attr->ia_ctime); 1048 if (ia_valid & ATTR_MODE) { 1049 umode_t mode = attr->ia_mode; 1050 1051 if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) 1052 mode &= ~S_ISGID; 1053 set_acl_inode(inode, mode); 1054 } 1055 } 1056 #else 1057 #define __setattr_copy setattr_copy 1058 #endif 1059 1060 int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 1061 struct iattr *attr) 1062 { 1063 struct inode *inode = d_inode(dentry); 1064 struct f2fs_inode_info *fi = F2FS_I(inode); 1065 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1066 int err; 1067 1068 if (unlikely(f2fs_cp_error(sbi))) 1069 return -EIO; 1070 1071 err = setattr_prepare(idmap, dentry, attr); 1072 if (err) 1073 return err; 1074 1075 err = fscrypt_prepare_setattr(dentry, attr); 1076 if (err) 1077 return err; 1078 1079 if (unlikely(IS_IMMUTABLE(inode))) 1080 return -EPERM; 1081 1082 if (unlikely(IS_APPEND(inode) && 1083 (attr->ia_valid & (ATTR_MODE | ATTR_UID | 1084 ATTR_GID | ATTR_TIMES_SET)))) 1085 return -EPERM; 1086 1087 if ((attr->ia_valid & ATTR_SIZE)) { 1088 if (!f2fs_is_compress_backend_ready(inode) || 1089 IS_DEVICE_ALIASING(inode)) 1090 return -EOPNOTSUPP; 1091 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && 1092 !IS_ALIGNED(attr->ia_size, 1093 F2FS_BLK_TO_BYTES(fi->i_cluster_size))) 1094 return -EINVAL; 1095 /* 1096 * To prevent scattered pin block generation, we don't allow 1097 * smaller/equal size unaligned truncation for pinned file. 1098 * We only support overwrite IO to pinned file, so don't 1099 * care about larger size truncation. 1100 */ 1101 if (f2fs_is_pinned_file(inode) && 1102 attr->ia_size <= i_size_read(inode) && 1103 !IS_ALIGNED(attr->ia_size, 1104 F2FS_BLK_TO_BYTES(CAP_BLKS_PER_SEC(sbi)))) 1105 return -EINVAL; 1106 } 1107 1108 if (is_quota_modification(idmap, inode, attr)) { 1109 err = f2fs_dquot_initialize(inode); 1110 if (err) 1111 return err; 1112 } 1113 if (i_uid_needs_update(idmap, attr, inode) || 1114 i_gid_needs_update(idmap, attr, inode)) { 1115 f2fs_lock_op(sbi); 1116 err = dquot_transfer(idmap, inode, attr); 1117 if (err) { 1118 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); 1119 f2fs_unlock_op(sbi); 1120 return err; 1121 } 1122 /* 1123 * update uid/gid under lock_op(), so that dquot and inode can 1124 * be updated atomically. 1125 */ 1126 i_uid_update(idmap, attr, inode); 1127 i_gid_update(idmap, attr, inode); 1128 f2fs_mark_inode_dirty_sync(inode, true); 1129 f2fs_unlock_op(sbi); 1130 } 1131 1132 if (attr->ia_valid & ATTR_SIZE) { 1133 loff_t old_size = i_size_read(inode); 1134 1135 if (attr->ia_size > MAX_INLINE_DATA(inode)) { 1136 /* 1137 * should convert inline inode before i_size_write to 1138 * keep smaller than inline_data size with inline flag. 1139 */ 1140 err = f2fs_convert_inline_inode(inode); 1141 if (err) 1142 return err; 1143 } 1144 1145 /* 1146 * wait for inflight dio, blocks should be removed after 1147 * IO completion. 1148 */ 1149 if (attr->ia_size < old_size) 1150 inode_dio_wait(inode); 1151 1152 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 1153 filemap_invalidate_lock(inode->i_mapping); 1154 1155 if (attr->ia_size > old_size) 1156 f2fs_zero_post_eof_page(inode, attr->ia_size, false); 1157 truncate_setsize(inode, attr->ia_size); 1158 1159 if (attr->ia_size <= old_size) 1160 err = f2fs_truncate(inode); 1161 /* 1162 * do not trim all blocks after i_size if target size is 1163 * larger than i_size. 1164 */ 1165 filemap_invalidate_unlock(inode->i_mapping); 1166 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 1167 if (err) 1168 return err; 1169 1170 spin_lock(&fi->i_size_lock); 1171 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 1172 fi->last_disk_size = i_size_read(inode); 1173 spin_unlock(&fi->i_size_lock); 1174 } 1175 1176 __setattr_copy(idmap, inode, attr); 1177 1178 if (attr->ia_valid & ATTR_MODE) { 1179 err = posix_acl_chmod(idmap, dentry, f2fs_get_inode_mode(inode)); 1180 1181 if (is_inode_flag_set(inode, FI_ACL_MODE)) { 1182 if (!err) 1183 inode->i_mode = fi->i_acl_mode; 1184 clear_inode_flag(inode, FI_ACL_MODE); 1185 } 1186 } 1187 1188 /* file size may changed here */ 1189 f2fs_mark_inode_dirty_sync(inode, true); 1190 1191 /* inode change will produce dirty node pages flushed by checkpoint */ 1192 f2fs_balance_fs(sbi, true); 1193 1194 return err; 1195 } 1196 1197 const struct inode_operations f2fs_file_inode_operations = { 1198 .getattr = f2fs_getattr, 1199 .setattr = f2fs_setattr, 1200 .get_inode_acl = f2fs_get_acl, 1201 .set_acl = f2fs_set_acl, 1202 .listxattr = f2fs_listxattr, 1203 .fiemap = f2fs_fiemap, 1204 .fileattr_get = f2fs_fileattr_get, 1205 .fileattr_set = f2fs_fileattr_set, 1206 }; 1207 1208 static int fill_zero(struct inode *inode, pgoff_t index, 1209 loff_t start, loff_t len) 1210 { 1211 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1212 struct folio *folio; 1213 1214 if (!len) 1215 return 0; 1216 1217 f2fs_balance_fs(sbi, true); 1218 1219 f2fs_lock_op(sbi); 1220 folio = f2fs_get_new_data_folio(inode, NULL, index, false); 1221 f2fs_unlock_op(sbi); 1222 1223 if (IS_ERR(folio)) 1224 return PTR_ERR(folio); 1225 1226 f2fs_folio_wait_writeback(folio, DATA, true, true); 1227 folio_zero_range(folio, start, len); 1228 folio_mark_dirty(folio); 1229 f2fs_folio_put(folio, true); 1230 return 0; 1231 } 1232 1233 int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) 1234 { 1235 int err; 1236 1237 while (pg_start < pg_end) { 1238 struct dnode_of_data dn; 1239 pgoff_t end_offset, count; 1240 1241 set_new_dnode(&dn, inode, NULL, NULL, 0); 1242 err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); 1243 if (err) { 1244 if (err == -ENOENT) { 1245 pg_start = f2fs_get_next_page_offset(&dn, 1246 pg_start); 1247 continue; 1248 } 1249 return err; 1250 } 1251 1252 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 1253 count = min(end_offset - dn.ofs_in_node, pg_end - pg_start); 1254 1255 f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset); 1256 1257 f2fs_truncate_data_blocks_range(&dn, count); 1258 f2fs_put_dnode(&dn); 1259 1260 pg_start += count; 1261 } 1262 return 0; 1263 } 1264 1265 static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len) 1266 { 1267 pgoff_t pg_start, pg_end; 1268 loff_t off_start, off_end; 1269 int ret; 1270 1271 ret = f2fs_convert_inline_inode(inode); 1272 if (ret) 1273 return ret; 1274 1275 f2fs_zero_post_eof_page(inode, offset + len, true); 1276 1277 pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; 1278 pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; 1279 1280 off_start = offset & (PAGE_SIZE - 1); 1281 off_end = (offset + len) & (PAGE_SIZE - 1); 1282 1283 if (pg_start == pg_end) { 1284 ret = fill_zero(inode, pg_start, off_start, 1285 off_end - off_start); 1286 if (ret) 1287 return ret; 1288 } else { 1289 if (off_start) { 1290 ret = fill_zero(inode, pg_start++, off_start, 1291 PAGE_SIZE - off_start); 1292 if (ret) 1293 return ret; 1294 } 1295 if (off_end) { 1296 ret = fill_zero(inode, pg_end, 0, off_end); 1297 if (ret) 1298 return ret; 1299 } 1300 1301 if (pg_start < pg_end) { 1302 loff_t blk_start, blk_end; 1303 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1304 1305 f2fs_balance_fs(sbi, true); 1306 1307 blk_start = (loff_t)pg_start << PAGE_SHIFT; 1308 blk_end = (loff_t)pg_end << PAGE_SHIFT; 1309 1310 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1311 filemap_invalidate_lock(inode->i_mapping); 1312 1313 truncate_pagecache_range(inode, blk_start, blk_end - 1); 1314 1315 f2fs_lock_op(sbi); 1316 ret = f2fs_truncate_hole(inode, pg_start, pg_end); 1317 f2fs_unlock_op(sbi); 1318 1319 filemap_invalidate_unlock(inode->i_mapping); 1320 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1321 } 1322 } 1323 1324 return ret; 1325 } 1326 1327 static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, 1328 int *do_replace, pgoff_t off, pgoff_t len) 1329 { 1330 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1331 struct dnode_of_data dn; 1332 int ret, done, i; 1333 1334 next_dnode: 1335 set_new_dnode(&dn, inode, NULL, NULL, 0); 1336 ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); 1337 if (ret && ret != -ENOENT) { 1338 return ret; 1339 } else if (ret == -ENOENT) { 1340 if (dn.max_level == 0) 1341 return -ENOENT; 1342 done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - 1343 dn.ofs_in_node, len); 1344 blkaddr += done; 1345 do_replace += done; 1346 goto next; 1347 } 1348 1349 done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, inode) - 1350 dn.ofs_in_node, len); 1351 for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { 1352 *blkaddr = f2fs_data_blkaddr(&dn); 1353 1354 if (__is_valid_data_blkaddr(*blkaddr) && 1355 !f2fs_is_valid_blkaddr(sbi, *blkaddr, 1356 DATA_GENERIC_ENHANCE)) { 1357 f2fs_put_dnode(&dn); 1358 return -EFSCORRUPTED; 1359 } 1360 1361 if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { 1362 1363 if (f2fs_lfs_mode(sbi)) { 1364 f2fs_put_dnode(&dn); 1365 return -EOPNOTSUPP; 1366 } 1367 1368 /* do not invalidate this block address */ 1369 f2fs_update_data_blkaddr(&dn, NULL_ADDR); 1370 *do_replace = 1; 1371 } 1372 } 1373 f2fs_put_dnode(&dn); 1374 next: 1375 len -= done; 1376 off += done; 1377 if (len) 1378 goto next_dnode; 1379 return 0; 1380 } 1381 1382 static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, 1383 int *do_replace, pgoff_t off, int len) 1384 { 1385 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1386 struct dnode_of_data dn; 1387 int ret, i; 1388 1389 for (i = 0; i < len; i++, do_replace++, blkaddr++) { 1390 if (*do_replace == 0) 1391 continue; 1392 1393 set_new_dnode(&dn, inode, NULL, NULL, 0); 1394 ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); 1395 if (ret) { 1396 dec_valid_block_count(sbi, inode, 1); 1397 f2fs_invalidate_blocks(sbi, *blkaddr, 1); 1398 } else { 1399 f2fs_update_data_blkaddr(&dn, *blkaddr); 1400 } 1401 f2fs_put_dnode(&dn); 1402 } 1403 return 0; 1404 } 1405 1406 static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, 1407 block_t *blkaddr, int *do_replace, 1408 pgoff_t src, pgoff_t dst, pgoff_t len, bool full) 1409 { 1410 struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode); 1411 pgoff_t i = 0; 1412 int ret; 1413 1414 while (i < len) { 1415 if (blkaddr[i] == NULL_ADDR && !full) { 1416 i++; 1417 continue; 1418 } 1419 1420 if (do_replace[i] || blkaddr[i] == NULL_ADDR) { 1421 struct dnode_of_data dn; 1422 struct node_info ni; 1423 size_t new_size; 1424 pgoff_t ilen; 1425 1426 set_new_dnode(&dn, dst_inode, NULL, NULL, 0); 1427 ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE); 1428 if (ret) 1429 return ret; 1430 1431 ret = f2fs_get_node_info(sbi, dn.nid, &ni, false); 1432 if (ret) { 1433 f2fs_put_dnode(&dn); 1434 return ret; 1435 } 1436 1437 ilen = min((pgoff_t) 1438 ADDRS_PER_PAGE(dn.node_folio, dst_inode) - 1439 dn.ofs_in_node, len - i); 1440 do { 1441 dn.data_blkaddr = f2fs_data_blkaddr(&dn); 1442 f2fs_truncate_data_blocks_range(&dn, 1); 1443 1444 if (do_replace[i]) { 1445 f2fs_i_blocks_write(src_inode, 1446 1, false, false); 1447 f2fs_i_blocks_write(dst_inode, 1448 1, true, false); 1449 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, 1450 blkaddr[i], ni.version, true, false); 1451 1452 do_replace[i] = 0; 1453 } 1454 dn.ofs_in_node++; 1455 i++; 1456 new_size = (loff_t)(dst + i) << PAGE_SHIFT; 1457 if (dst_inode->i_size < new_size) 1458 f2fs_i_size_write(dst_inode, new_size); 1459 } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR)); 1460 1461 f2fs_put_dnode(&dn); 1462 } else { 1463 struct folio *fsrc, *fdst; 1464 1465 fsrc = f2fs_get_lock_data_folio(src_inode, 1466 src + i, true); 1467 if (IS_ERR(fsrc)) 1468 return PTR_ERR(fsrc); 1469 fdst = f2fs_get_new_data_folio(dst_inode, NULL, dst + i, 1470 true); 1471 if (IS_ERR(fdst)) { 1472 f2fs_folio_put(fsrc, true); 1473 return PTR_ERR(fdst); 1474 } 1475 1476 f2fs_folio_wait_writeback(fdst, DATA, true, true); 1477 1478 memcpy_folio(fdst, 0, fsrc, 0, PAGE_SIZE); 1479 folio_mark_dirty(fdst); 1480 folio_set_f2fs_gcing(fdst); 1481 f2fs_folio_put(fdst, true); 1482 f2fs_folio_put(fsrc, true); 1483 1484 ret = f2fs_truncate_hole(src_inode, 1485 src + i, src + i + 1); 1486 if (ret) 1487 return ret; 1488 i++; 1489 } 1490 } 1491 return 0; 1492 } 1493 1494 static int __exchange_data_block(struct inode *src_inode, 1495 struct inode *dst_inode, pgoff_t src, pgoff_t dst, 1496 pgoff_t len, bool full) 1497 { 1498 block_t *src_blkaddr; 1499 int *do_replace; 1500 pgoff_t olen; 1501 int ret; 1502 1503 while (len) { 1504 olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len); 1505 1506 src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode), 1507 array_size(olen, sizeof(block_t)), 1508 GFP_NOFS); 1509 if (!src_blkaddr) 1510 return -ENOMEM; 1511 1512 do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode), 1513 array_size(olen, sizeof(int)), 1514 GFP_NOFS); 1515 if (!do_replace) { 1516 kvfree(src_blkaddr); 1517 return -ENOMEM; 1518 } 1519 1520 ret = __read_out_blkaddrs(src_inode, src_blkaddr, 1521 do_replace, src, olen); 1522 if (ret) 1523 goto roll_back; 1524 1525 ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr, 1526 do_replace, src, dst, olen, full); 1527 if (ret) 1528 goto roll_back; 1529 1530 src += olen; 1531 dst += olen; 1532 len -= olen; 1533 1534 kvfree(src_blkaddr); 1535 kvfree(do_replace); 1536 } 1537 return 0; 1538 1539 roll_back: 1540 __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen); 1541 kvfree(src_blkaddr); 1542 kvfree(do_replace); 1543 return ret; 1544 } 1545 1546 static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) 1547 { 1548 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1549 pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 1550 pgoff_t start = offset >> PAGE_SHIFT; 1551 pgoff_t end = (offset + len) >> PAGE_SHIFT; 1552 int ret; 1553 1554 f2fs_balance_fs(sbi, true); 1555 1556 /* avoid gc operation during block exchange */ 1557 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1558 filemap_invalidate_lock(inode->i_mapping); 1559 1560 f2fs_zero_post_eof_page(inode, offset + len, false); 1561 1562 f2fs_lock_op(sbi); 1563 f2fs_drop_extent_tree(inode); 1564 truncate_pagecache(inode, offset); 1565 ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true); 1566 f2fs_unlock_op(sbi); 1567 1568 filemap_invalidate_unlock(inode->i_mapping); 1569 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1570 return ret; 1571 } 1572 1573 static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) 1574 { 1575 loff_t new_size; 1576 int ret; 1577 1578 if (offset + len >= i_size_read(inode)) 1579 return -EINVAL; 1580 1581 /* collapse range should be aligned to block size of f2fs. */ 1582 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) 1583 return -EINVAL; 1584 1585 ret = f2fs_convert_inline_inode(inode); 1586 if (ret) 1587 return ret; 1588 1589 /* write out all dirty pages from offset */ 1590 ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); 1591 if (ret) 1592 return ret; 1593 1594 ret = f2fs_do_collapse(inode, offset, len); 1595 if (ret) 1596 return ret; 1597 1598 /* write out all moved pages, if possible */ 1599 filemap_invalidate_lock(inode->i_mapping); 1600 filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); 1601 truncate_pagecache(inode, offset); 1602 1603 new_size = i_size_read(inode) - len; 1604 ret = f2fs_truncate_blocks(inode, new_size, true); 1605 filemap_invalidate_unlock(inode->i_mapping); 1606 if (!ret) 1607 f2fs_i_size_write(inode, new_size); 1608 return ret; 1609 } 1610 1611 static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, 1612 pgoff_t end) 1613 { 1614 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1615 pgoff_t index = start; 1616 unsigned int ofs_in_node = dn->ofs_in_node; 1617 blkcnt_t count = 0; 1618 int ret; 1619 1620 for (; index < end; index++, dn->ofs_in_node++) { 1621 if (f2fs_data_blkaddr(dn) == NULL_ADDR) 1622 count++; 1623 } 1624 1625 dn->ofs_in_node = ofs_in_node; 1626 ret = f2fs_reserve_new_blocks(dn, count); 1627 if (ret) 1628 return ret; 1629 1630 dn->ofs_in_node = ofs_in_node; 1631 for (index = start; index < end; index++, dn->ofs_in_node++) { 1632 dn->data_blkaddr = f2fs_data_blkaddr(dn); 1633 /* 1634 * f2fs_reserve_new_blocks will not guarantee entire block 1635 * allocation. 1636 */ 1637 if (dn->data_blkaddr == NULL_ADDR) { 1638 ret = -ENOSPC; 1639 break; 1640 } 1641 1642 if (dn->data_blkaddr == NEW_ADDR) 1643 continue; 1644 1645 if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr, 1646 DATA_GENERIC_ENHANCE)) { 1647 ret = -EFSCORRUPTED; 1648 break; 1649 } 1650 1651 f2fs_invalidate_blocks(sbi, dn->data_blkaddr, 1); 1652 f2fs_set_data_blkaddr(dn, NEW_ADDR); 1653 } 1654 1655 if (index > start) { 1656 f2fs_update_read_extent_cache_range(dn, start, 0, 1657 index - start); 1658 f2fs_update_age_extent_cache_range(dn, start, index - start); 1659 } 1660 1661 return ret; 1662 } 1663 1664 static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, 1665 int mode) 1666 { 1667 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1668 struct address_space *mapping = inode->i_mapping; 1669 pgoff_t index, pg_start, pg_end; 1670 loff_t new_size = i_size_read(inode); 1671 loff_t off_start, off_end; 1672 int ret = 0; 1673 1674 ret = inode_newsize_ok(inode, (len + offset)); 1675 if (ret) 1676 return ret; 1677 1678 ret = f2fs_convert_inline_inode(inode); 1679 if (ret) 1680 return ret; 1681 1682 ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); 1683 if (ret) 1684 return ret; 1685 1686 f2fs_zero_post_eof_page(inode, offset + len, true); 1687 1688 pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; 1689 pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; 1690 1691 off_start = offset & (PAGE_SIZE - 1); 1692 off_end = (offset + len) & (PAGE_SIZE - 1); 1693 1694 if (pg_start == pg_end) { 1695 ret = fill_zero(inode, pg_start, off_start, 1696 off_end - off_start); 1697 if (ret) 1698 return ret; 1699 1700 new_size = max_t(loff_t, new_size, offset + len); 1701 } else { 1702 if (off_start) { 1703 ret = fill_zero(inode, pg_start++, off_start, 1704 PAGE_SIZE - off_start); 1705 if (ret) 1706 return ret; 1707 1708 new_size = max_t(loff_t, new_size, 1709 (loff_t)pg_start << PAGE_SHIFT); 1710 } 1711 1712 for (index = pg_start; index < pg_end;) { 1713 struct dnode_of_data dn; 1714 unsigned int end_offset; 1715 pgoff_t end; 1716 1717 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1718 filemap_invalidate_lock(mapping); 1719 1720 truncate_pagecache_range(inode, 1721 (loff_t)index << PAGE_SHIFT, 1722 ((loff_t)pg_end << PAGE_SHIFT) - 1); 1723 1724 f2fs_lock_op(sbi); 1725 1726 set_new_dnode(&dn, inode, NULL, NULL, 0); 1727 ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE); 1728 if (ret) { 1729 f2fs_unlock_op(sbi); 1730 filemap_invalidate_unlock(mapping); 1731 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1732 goto out; 1733 } 1734 1735 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 1736 end = min(pg_end, end_offset - dn.ofs_in_node + index); 1737 1738 ret = f2fs_do_zero_range(&dn, index, end); 1739 f2fs_put_dnode(&dn); 1740 1741 f2fs_unlock_op(sbi); 1742 filemap_invalidate_unlock(mapping); 1743 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1744 1745 f2fs_balance_fs(sbi, dn.node_changed); 1746 1747 if (ret) 1748 goto out; 1749 1750 index = end; 1751 new_size = max_t(loff_t, new_size, 1752 (loff_t)index << PAGE_SHIFT); 1753 } 1754 1755 if (off_end) { 1756 ret = fill_zero(inode, pg_end, 0, off_end); 1757 if (ret) 1758 goto out; 1759 1760 new_size = max_t(loff_t, new_size, offset + len); 1761 } 1762 } 1763 1764 out: 1765 if (new_size > i_size_read(inode)) { 1766 if (mode & FALLOC_FL_KEEP_SIZE) 1767 file_set_keep_isize(inode); 1768 else 1769 f2fs_i_size_write(inode, new_size); 1770 } 1771 return ret; 1772 } 1773 1774 static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) 1775 { 1776 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1777 struct address_space *mapping = inode->i_mapping; 1778 pgoff_t nr, pg_start, pg_end, delta, idx; 1779 loff_t new_size; 1780 int ret = 0; 1781 1782 new_size = i_size_read(inode) + len; 1783 ret = inode_newsize_ok(inode, new_size); 1784 if (ret) 1785 return ret; 1786 1787 if (offset >= i_size_read(inode)) 1788 return -EINVAL; 1789 1790 /* insert range should be aligned to block size of f2fs. */ 1791 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) 1792 return -EINVAL; 1793 1794 ret = f2fs_convert_inline_inode(inode); 1795 if (ret) 1796 return ret; 1797 1798 f2fs_balance_fs(sbi, true); 1799 1800 filemap_invalidate_lock(mapping); 1801 ret = f2fs_truncate_blocks(inode, i_size_read(inode), true); 1802 filemap_invalidate_unlock(mapping); 1803 if (ret) 1804 return ret; 1805 1806 /* write out all dirty pages from offset */ 1807 ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX); 1808 if (ret) 1809 return ret; 1810 1811 pg_start = offset >> PAGE_SHIFT; 1812 pg_end = (offset + len) >> PAGE_SHIFT; 1813 delta = pg_end - pg_start; 1814 idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 1815 1816 /* avoid gc operation during block exchange */ 1817 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1818 filemap_invalidate_lock(mapping); 1819 1820 f2fs_zero_post_eof_page(inode, offset + len, false); 1821 truncate_pagecache(inode, offset); 1822 1823 while (!ret && idx > pg_start) { 1824 nr = idx - pg_start; 1825 if (nr > delta) 1826 nr = delta; 1827 idx -= nr; 1828 1829 f2fs_lock_op(sbi); 1830 f2fs_drop_extent_tree(inode); 1831 1832 ret = __exchange_data_block(inode, inode, idx, 1833 idx + delta, nr, false); 1834 f2fs_unlock_op(sbi); 1835 } 1836 filemap_invalidate_unlock(mapping); 1837 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1838 if (ret) 1839 return ret; 1840 1841 /* write out all moved pages, if possible */ 1842 filemap_invalidate_lock(mapping); 1843 ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX); 1844 truncate_pagecache(inode, offset); 1845 filemap_invalidate_unlock(mapping); 1846 1847 if (!ret) 1848 f2fs_i_size_write(inode, new_size); 1849 return ret; 1850 } 1851 1852 static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, 1853 loff_t len, int mode) 1854 { 1855 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1856 struct f2fs_map_blocks map = { .m_next_pgofs = NULL, 1857 .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, 1858 .m_may_create = true }; 1859 struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, 1860 .init_gc_type = FG_GC, 1861 .should_migrate_blocks = false, 1862 .err_gc_skipped = true, 1863 .nr_free_secs = 0 }; 1864 pgoff_t pg_start, pg_end; 1865 loff_t new_size; 1866 loff_t off_end; 1867 block_t expanded = 0; 1868 int err; 1869 1870 err = inode_newsize_ok(inode, (len + offset)); 1871 if (err) 1872 return err; 1873 1874 err = f2fs_convert_inline_inode(inode); 1875 if (err) 1876 return err; 1877 1878 f2fs_zero_post_eof_page(inode, offset + len, true); 1879 1880 f2fs_balance_fs(sbi, true); 1881 1882 pg_start = ((unsigned long long)offset) >> PAGE_SHIFT; 1883 pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT; 1884 off_end = (offset + len) & (PAGE_SIZE - 1); 1885 1886 map.m_lblk = pg_start; 1887 map.m_len = pg_end - pg_start; 1888 if (off_end) 1889 map.m_len++; 1890 1891 if (!map.m_len) 1892 return 0; 1893 1894 if (f2fs_is_pinned_file(inode)) { 1895 block_t sec_blks = CAP_BLKS_PER_SEC(sbi); 1896 block_t sec_len = roundup(map.m_len, sec_blks); 1897 1898 map.m_len = sec_blks; 1899 next_alloc: 1900 f2fs_down_write(&sbi->pin_sem); 1901 1902 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 1903 if (has_not_enough_free_secs(sbi, 0, 0)) { 1904 f2fs_up_write(&sbi->pin_sem); 1905 err = -ENOSPC; 1906 f2fs_warn_ratelimited(sbi, 1907 "ino:%lu, start:%lu, end:%lu, need to trigger GC to " 1908 "reclaim enough free segment when checkpoint is enabled", 1909 inode->i_ino, pg_start, pg_end); 1910 goto out_err; 1911 } 1912 } 1913 1914 if (has_not_enough_free_secs(sbi, 0, 1915 sbi->reserved_pin_section)) { 1916 f2fs_down_write(&sbi->gc_lock); 1917 stat_inc_gc_call_count(sbi, FOREGROUND); 1918 err = f2fs_gc(sbi, &gc_control); 1919 if (err && err != -ENODATA) { 1920 f2fs_up_write(&sbi->pin_sem); 1921 goto out_err; 1922 } 1923 } 1924 1925 err = f2fs_allocate_pinning_section(sbi); 1926 if (err) { 1927 f2fs_up_write(&sbi->pin_sem); 1928 goto out_err; 1929 } 1930 1931 map.m_seg_type = CURSEG_COLD_DATA_PINNED; 1932 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO); 1933 file_dont_truncate(inode); 1934 1935 f2fs_up_write(&sbi->pin_sem); 1936 1937 expanded += map.m_len; 1938 sec_len -= map.m_len; 1939 map.m_lblk += map.m_len; 1940 if (!err && sec_len) 1941 goto next_alloc; 1942 1943 map.m_len = expanded; 1944 } else { 1945 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_AIO); 1946 expanded = map.m_len; 1947 } 1948 out_err: 1949 if (err) { 1950 pgoff_t last_off; 1951 1952 if (!expanded) 1953 return err; 1954 1955 last_off = pg_start + expanded - 1; 1956 1957 /* update new size to the failed position */ 1958 new_size = (last_off == pg_end) ? offset + len : 1959 (loff_t)(last_off + 1) << PAGE_SHIFT; 1960 } else { 1961 new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; 1962 } 1963 1964 if (new_size > i_size_read(inode)) { 1965 if (mode & FALLOC_FL_KEEP_SIZE) 1966 file_set_keep_isize(inode); 1967 else 1968 f2fs_i_size_write(inode, new_size); 1969 } 1970 1971 return err; 1972 } 1973 1974 static long f2fs_fallocate(struct file *file, int mode, 1975 loff_t offset, loff_t len) 1976 { 1977 struct inode *inode = file_inode(file); 1978 long ret = 0; 1979 1980 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 1981 return -EIO; 1982 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) 1983 return -ENOSPC; 1984 if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) 1985 return -EOPNOTSUPP; 1986 1987 /* f2fs only support ->fallocate for regular file */ 1988 if (!S_ISREG(inode->i_mode)) 1989 return -EINVAL; 1990 1991 if (IS_ENCRYPTED(inode) && 1992 (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) 1993 return -EOPNOTSUPP; 1994 1995 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 1996 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | 1997 FALLOC_FL_INSERT_RANGE)) 1998 return -EOPNOTSUPP; 1999 2000 inode_lock(inode); 2001 2002 /* 2003 * Pinned file should not support partial truncation since the block 2004 * can be used by applications. 2005 */ 2006 if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && 2007 (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | 2008 FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) { 2009 ret = -EOPNOTSUPP; 2010 goto out; 2011 } 2012 2013 ret = file_modified(file); 2014 if (ret) 2015 goto out; 2016 2017 /* 2018 * wait for inflight dio, blocks should be removed after IO 2019 * completion. 2020 */ 2021 inode_dio_wait(inode); 2022 2023 if (mode & FALLOC_FL_PUNCH_HOLE) { 2024 if (offset >= inode->i_size) 2025 goto out; 2026 2027 ret = f2fs_punch_hole(inode, offset, len); 2028 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { 2029 ret = f2fs_collapse_range(inode, offset, len); 2030 } else if (mode & FALLOC_FL_ZERO_RANGE) { 2031 ret = f2fs_zero_range(inode, offset, len, mode); 2032 } else if (mode & FALLOC_FL_INSERT_RANGE) { 2033 ret = f2fs_insert_range(inode, offset, len); 2034 } else { 2035 ret = f2fs_expand_inode_data(inode, offset, len, mode); 2036 } 2037 2038 if (!ret) { 2039 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 2040 f2fs_mark_inode_dirty_sync(inode, false); 2041 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2042 } 2043 2044 out: 2045 inode_unlock(inode); 2046 2047 trace_f2fs_fallocate(inode, mode, offset, len, ret); 2048 return ret; 2049 } 2050 2051 static int f2fs_release_file(struct inode *inode, struct file *filp) 2052 { 2053 if (atomic_dec_and_test(&F2FS_I(inode)->open_count)) 2054 f2fs_remove_donate_inode(inode); 2055 2056 /* 2057 * f2fs_release_file is called at every close calls. So we should 2058 * not drop any inmemory pages by close called by other process. 2059 */ 2060 if (!(filp->f_mode & FMODE_WRITE) || 2061 atomic_read(&inode->i_writecount) != 1) 2062 return 0; 2063 2064 inode_lock(inode); 2065 f2fs_abort_atomic_write(inode, true); 2066 inode_unlock(inode); 2067 2068 return 0; 2069 } 2070 2071 static int f2fs_file_flush(struct file *file, fl_owner_t id) 2072 { 2073 struct inode *inode = file_inode(file); 2074 2075 /* 2076 * If the process doing a transaction is crashed, we should do 2077 * roll-back. Otherwise, other reader/write can see corrupted database 2078 * until all the writers close its file. Since this should be done 2079 * before dropping file lock, it needs to do in ->flush. 2080 */ 2081 if (F2FS_I(inode)->atomic_write_task == current && 2082 (current->flags & PF_EXITING)) { 2083 inode_lock(inode); 2084 f2fs_abort_atomic_write(inode, true); 2085 inode_unlock(inode); 2086 } 2087 2088 return 0; 2089 } 2090 2091 static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) 2092 { 2093 struct f2fs_inode_info *fi = F2FS_I(inode); 2094 u32 masked_flags = fi->i_flags & mask; 2095 2096 /* mask can be shrunk by flags_valid selector */ 2097 iflags &= mask; 2098 2099 /* Is it quota file? Do not allow user to mess with it */ 2100 if (IS_NOQUOTA(inode)) 2101 return -EPERM; 2102 2103 if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) { 2104 if (!f2fs_sb_has_casefold(F2FS_I_SB(inode))) 2105 return -EOPNOTSUPP; 2106 if (!f2fs_empty_dir(inode)) 2107 return -ENOTEMPTY; 2108 } 2109 2110 if (iflags & (F2FS_COMPR_FL | F2FS_NOCOMP_FL)) { 2111 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 2112 return -EOPNOTSUPP; 2113 if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL)) 2114 return -EINVAL; 2115 } 2116 2117 if ((iflags ^ masked_flags) & F2FS_COMPR_FL) { 2118 if (masked_flags & F2FS_COMPR_FL) { 2119 if (!f2fs_disable_compressed_file(inode)) 2120 return -EINVAL; 2121 } else { 2122 /* try to convert inline_data to support compression */ 2123 int err = f2fs_convert_inline_inode(inode); 2124 if (err) 2125 return err; 2126 2127 f2fs_down_write(&fi->i_sem); 2128 if (!f2fs_may_compress(inode) || 2129 atomic_read(&fi->writeback) || 2130 (S_ISREG(inode->i_mode) && 2131 F2FS_HAS_BLOCKS(inode))) { 2132 f2fs_up_write(&fi->i_sem); 2133 return -EINVAL; 2134 } 2135 err = set_compress_context(inode); 2136 f2fs_up_write(&fi->i_sem); 2137 2138 if (err) 2139 return err; 2140 } 2141 } 2142 2143 fi->i_flags = iflags | (fi->i_flags & ~mask); 2144 f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) && 2145 (fi->i_flags & F2FS_NOCOMP_FL)); 2146 2147 if (fi->i_flags & F2FS_PROJINHERIT_FL) 2148 set_inode_flag(inode, FI_PROJ_INHERIT); 2149 else 2150 clear_inode_flag(inode, FI_PROJ_INHERIT); 2151 2152 inode_set_ctime_current(inode); 2153 f2fs_set_inode_flags(inode); 2154 f2fs_mark_inode_dirty_sync(inode, true); 2155 return 0; 2156 } 2157 2158 /* FS_IOC_[GS]ETFLAGS and FS_IOC_FS[GS]ETXATTR support */ 2159 2160 /* 2161 * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry 2162 * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to 2163 * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add 2164 * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL. 2165 * 2166 * Translating flags to fsx_flags value used by FS_IOC_FSGETXATTR and 2167 * FS_IOC_FSSETXATTR is done by the VFS. 2168 */ 2169 2170 static const struct { 2171 u32 iflag; 2172 u32 fsflag; 2173 } f2fs_fsflags_map[] = { 2174 { F2FS_COMPR_FL, FS_COMPR_FL }, 2175 { F2FS_SYNC_FL, FS_SYNC_FL }, 2176 { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL }, 2177 { F2FS_APPEND_FL, FS_APPEND_FL }, 2178 { F2FS_NODUMP_FL, FS_NODUMP_FL }, 2179 { F2FS_NOATIME_FL, FS_NOATIME_FL }, 2180 { F2FS_NOCOMP_FL, FS_NOCOMP_FL }, 2181 { F2FS_INDEX_FL, FS_INDEX_FL }, 2182 { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL }, 2183 { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL }, 2184 { F2FS_CASEFOLD_FL, FS_CASEFOLD_FL }, 2185 }; 2186 2187 #define F2FS_GETTABLE_FS_FL ( \ 2188 FS_COMPR_FL | \ 2189 FS_SYNC_FL | \ 2190 FS_IMMUTABLE_FL | \ 2191 FS_APPEND_FL | \ 2192 FS_NODUMP_FL | \ 2193 FS_NOATIME_FL | \ 2194 FS_NOCOMP_FL | \ 2195 FS_INDEX_FL | \ 2196 FS_DIRSYNC_FL | \ 2197 FS_PROJINHERIT_FL | \ 2198 FS_ENCRYPT_FL | \ 2199 FS_INLINE_DATA_FL | \ 2200 FS_NOCOW_FL | \ 2201 FS_VERITY_FL | \ 2202 FS_CASEFOLD_FL) 2203 2204 #define F2FS_SETTABLE_FS_FL ( \ 2205 FS_COMPR_FL | \ 2206 FS_SYNC_FL | \ 2207 FS_IMMUTABLE_FL | \ 2208 FS_APPEND_FL | \ 2209 FS_NODUMP_FL | \ 2210 FS_NOATIME_FL | \ 2211 FS_NOCOMP_FL | \ 2212 FS_DIRSYNC_FL | \ 2213 FS_PROJINHERIT_FL | \ 2214 FS_CASEFOLD_FL) 2215 2216 /* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */ 2217 static inline u32 f2fs_iflags_to_fsflags(u32 iflags) 2218 { 2219 u32 fsflags = 0; 2220 int i; 2221 2222 for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) 2223 if (iflags & f2fs_fsflags_map[i].iflag) 2224 fsflags |= f2fs_fsflags_map[i].fsflag; 2225 2226 return fsflags; 2227 } 2228 2229 /* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */ 2230 static inline u32 f2fs_fsflags_to_iflags(u32 fsflags) 2231 { 2232 u32 iflags = 0; 2233 int i; 2234 2235 for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) 2236 if (fsflags & f2fs_fsflags_map[i].fsflag) 2237 iflags |= f2fs_fsflags_map[i].iflag; 2238 2239 return iflags; 2240 } 2241 2242 static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) 2243 { 2244 struct inode *inode = file_inode(filp); 2245 2246 return put_user(inode->i_generation, (int __user *)arg); 2247 } 2248 2249 static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) 2250 { 2251 struct inode *inode = file_inode(filp); 2252 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2253 struct f2fs_inode_info *fi = F2FS_I(inode); 2254 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2255 loff_t isize; 2256 int ret; 2257 2258 if (!(filp->f_mode & FMODE_WRITE)) 2259 return -EBADF; 2260 2261 if (!inode_owner_or_capable(idmap, inode)) 2262 return -EACCES; 2263 2264 if (!S_ISREG(inode->i_mode)) 2265 return -EINVAL; 2266 2267 if (filp->f_flags & O_DIRECT) 2268 return -EINVAL; 2269 2270 ret = mnt_want_write_file(filp); 2271 if (ret) 2272 return ret; 2273 2274 inode_lock(inode); 2275 2276 if (!f2fs_disable_compressed_file(inode) || 2277 f2fs_is_pinned_file(inode)) { 2278 ret = -EINVAL; 2279 goto out; 2280 } 2281 2282 if (f2fs_is_atomic_file(inode)) 2283 goto out; 2284 2285 ret = f2fs_convert_inline_inode(inode); 2286 if (ret) 2287 goto out; 2288 2289 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 2290 f2fs_down_write(&fi->i_gc_rwsem[READ]); 2291 2292 /* 2293 * Should wait end_io to count F2FS_WB_CP_DATA correctly by 2294 * f2fs_is_atomic_file. 2295 */ 2296 if (get_dirty_pages(inode)) 2297 f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u", 2298 inode->i_ino, get_dirty_pages(inode)); 2299 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 2300 if (ret) 2301 goto out_unlock; 2302 2303 /* Check if the inode already has a COW inode */ 2304 if (fi->cow_inode == NULL) { 2305 /* Create a COW inode for atomic write */ 2306 struct dentry *dentry = file_dentry(filp); 2307 struct inode *dir = d_inode(dentry->d_parent); 2308 2309 ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode); 2310 if (ret) 2311 goto out_unlock; 2312 2313 set_inode_flag(fi->cow_inode, FI_COW_FILE); 2314 clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); 2315 2316 /* Set the COW inode's atomic_inode to the atomic inode */ 2317 F2FS_I(fi->cow_inode)->atomic_inode = inode; 2318 } else { 2319 /* Reuse the already created COW inode */ 2320 f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode)); 2321 2322 invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1); 2323 2324 ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); 2325 if (ret) 2326 goto out_unlock; 2327 } 2328 2329 f2fs_write_inode(inode, NULL); 2330 2331 stat_inc_atomic_inode(inode); 2332 2333 set_inode_flag(inode, FI_ATOMIC_FILE); 2334 2335 isize = i_size_read(inode); 2336 fi->original_i_size = isize; 2337 if (truncate) { 2338 set_inode_flag(inode, FI_ATOMIC_REPLACE); 2339 truncate_inode_pages_final(inode->i_mapping); 2340 f2fs_i_size_write(inode, 0); 2341 isize = 0; 2342 } 2343 f2fs_i_size_write(fi->cow_inode, isize); 2344 2345 out_unlock: 2346 f2fs_up_write(&fi->i_gc_rwsem[READ]); 2347 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 2348 if (ret) 2349 goto out; 2350 2351 f2fs_update_time(sbi, REQ_TIME); 2352 fi->atomic_write_task = current; 2353 stat_update_max_atomic_write(inode); 2354 fi->atomic_write_cnt = 0; 2355 out: 2356 inode_unlock(inode); 2357 mnt_drop_write_file(filp); 2358 return ret; 2359 } 2360 2361 static int f2fs_ioc_commit_atomic_write(struct file *filp) 2362 { 2363 struct inode *inode = file_inode(filp); 2364 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2365 int ret; 2366 2367 if (!(filp->f_mode & FMODE_WRITE)) 2368 return -EBADF; 2369 2370 if (!inode_owner_or_capable(idmap, inode)) 2371 return -EACCES; 2372 2373 ret = mnt_want_write_file(filp); 2374 if (ret) 2375 return ret; 2376 2377 f2fs_balance_fs(F2FS_I_SB(inode), true); 2378 2379 inode_lock(inode); 2380 2381 if (f2fs_is_atomic_file(inode)) { 2382 ret = f2fs_commit_atomic_write(inode); 2383 if (!ret) 2384 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); 2385 2386 f2fs_abort_atomic_write(inode, ret); 2387 } else { 2388 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); 2389 } 2390 2391 inode_unlock(inode); 2392 mnt_drop_write_file(filp); 2393 return ret; 2394 } 2395 2396 static int f2fs_ioc_abort_atomic_write(struct file *filp) 2397 { 2398 struct inode *inode = file_inode(filp); 2399 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2400 int ret; 2401 2402 if (!(filp->f_mode & FMODE_WRITE)) 2403 return -EBADF; 2404 2405 if (!inode_owner_or_capable(idmap, inode)) 2406 return -EACCES; 2407 2408 ret = mnt_want_write_file(filp); 2409 if (ret) 2410 return ret; 2411 2412 inode_lock(inode); 2413 2414 f2fs_abort_atomic_write(inode, true); 2415 2416 inode_unlock(inode); 2417 2418 mnt_drop_write_file(filp); 2419 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2420 return ret; 2421 } 2422 2423 int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, 2424 bool readonly, bool need_lock) 2425 { 2426 struct super_block *sb = sbi->sb; 2427 int ret = 0; 2428 2429 switch (flag) { 2430 case F2FS_GOING_DOWN_FULLSYNC: 2431 ret = bdev_freeze(sb->s_bdev); 2432 if (ret) 2433 goto out; 2434 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2435 bdev_thaw(sb->s_bdev); 2436 break; 2437 case F2FS_GOING_DOWN_METASYNC: 2438 /* do checkpoint only */ 2439 ret = f2fs_sync_fs(sb, 1); 2440 if (ret) { 2441 if (ret == -EIO) 2442 ret = 0; 2443 goto out; 2444 } 2445 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2446 break; 2447 case F2FS_GOING_DOWN_NOSYNC: 2448 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2449 break; 2450 case F2FS_GOING_DOWN_METAFLUSH: 2451 f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); 2452 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2453 break; 2454 case F2FS_GOING_DOWN_NEED_FSCK: 2455 set_sbi_flag(sbi, SBI_NEED_FSCK); 2456 set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); 2457 set_sbi_flag(sbi, SBI_IS_DIRTY); 2458 /* do checkpoint only */ 2459 ret = f2fs_sync_fs(sb, 1); 2460 if (ret == -EIO) 2461 ret = 0; 2462 goto out; 2463 default: 2464 ret = -EINVAL; 2465 goto out; 2466 } 2467 2468 if (readonly) 2469 goto out; 2470 2471 /* 2472 * grab sb->s_umount to avoid racing w/ remount() and other shutdown 2473 * paths. 2474 */ 2475 if (need_lock) 2476 down_write(&sbi->sb->s_umount); 2477 2478 f2fs_stop_gc_thread(sbi); 2479 f2fs_stop_discard_thread(sbi); 2480 2481 f2fs_drop_discard_cmd(sbi); 2482 clear_opt(sbi, DISCARD); 2483 2484 if (need_lock) 2485 up_write(&sbi->sb->s_umount); 2486 2487 f2fs_update_time(sbi, REQ_TIME); 2488 out: 2489 2490 trace_f2fs_shutdown(sbi, flag, ret); 2491 2492 return ret; 2493 } 2494 2495 static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) 2496 { 2497 struct inode *inode = file_inode(filp); 2498 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2499 __u32 in; 2500 int ret; 2501 bool need_drop = false, readonly = false; 2502 2503 if (!capable(CAP_SYS_ADMIN)) 2504 return -EPERM; 2505 2506 if (get_user(in, (__u32 __user *)arg)) 2507 return -EFAULT; 2508 2509 if (in != F2FS_GOING_DOWN_FULLSYNC) { 2510 ret = mnt_want_write_file(filp); 2511 if (ret) { 2512 if (ret != -EROFS) 2513 return ret; 2514 2515 /* fallback to nosync shutdown for readonly fs */ 2516 in = F2FS_GOING_DOWN_NOSYNC; 2517 readonly = true; 2518 } else { 2519 need_drop = true; 2520 } 2521 } 2522 2523 ret = f2fs_do_shutdown(sbi, in, readonly, true); 2524 2525 if (need_drop) 2526 mnt_drop_write_file(filp); 2527 2528 return ret; 2529 } 2530 2531 static int f2fs_keep_noreuse_range(struct inode *inode, 2532 loff_t offset, loff_t len) 2533 { 2534 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2535 u64 max_bytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 2536 u64 start, end; 2537 int ret = 0; 2538 2539 if (!S_ISREG(inode->i_mode)) 2540 return 0; 2541 2542 if (offset >= max_bytes || len > max_bytes || 2543 (offset + len) > max_bytes) 2544 return 0; 2545 2546 start = offset >> PAGE_SHIFT; 2547 end = DIV_ROUND_UP(offset + len, PAGE_SIZE); 2548 2549 inode_lock(inode); 2550 if (f2fs_is_atomic_file(inode)) { 2551 inode_unlock(inode); 2552 return 0; 2553 } 2554 2555 spin_lock(&sbi->inode_lock[DONATE_INODE]); 2556 /* let's remove the range, if len = 0 */ 2557 if (!len) { 2558 if (!list_empty(&F2FS_I(inode)->gdonate_list)) { 2559 list_del_init(&F2FS_I(inode)->gdonate_list); 2560 sbi->donate_files--; 2561 if (is_inode_flag_set(inode, FI_DONATE_FINISHED)) 2562 ret = -EALREADY; 2563 else 2564 set_inode_flag(inode, FI_DONATE_FINISHED); 2565 } else 2566 ret = -ENOENT; 2567 } else { 2568 if (list_empty(&F2FS_I(inode)->gdonate_list)) { 2569 list_add_tail(&F2FS_I(inode)->gdonate_list, 2570 &sbi->inode_list[DONATE_INODE]); 2571 sbi->donate_files++; 2572 } else { 2573 list_move_tail(&F2FS_I(inode)->gdonate_list, 2574 &sbi->inode_list[DONATE_INODE]); 2575 } 2576 F2FS_I(inode)->donate_start = start; 2577 F2FS_I(inode)->donate_end = end - 1; 2578 clear_inode_flag(inode, FI_DONATE_FINISHED); 2579 } 2580 spin_unlock(&sbi->inode_lock[DONATE_INODE]); 2581 inode_unlock(inode); 2582 2583 return ret; 2584 } 2585 2586 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) 2587 { 2588 struct inode *inode = file_inode(filp); 2589 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2590 struct fstrim_range range; 2591 int ret; 2592 2593 if (!capable(CAP_SYS_ADMIN)) 2594 return -EPERM; 2595 2596 if (!f2fs_hw_support_discard(sbi)) 2597 return -EOPNOTSUPP; 2598 2599 if (copy_from_user(&range, (struct fstrim_range __user *)arg, 2600 sizeof(range))) 2601 return -EFAULT; 2602 2603 ret = mnt_want_write_file(filp); 2604 if (ret) 2605 return ret; 2606 2607 range.minlen = max_t(unsigned int, range.minlen, 2608 f2fs_hw_discard_granularity(sbi)); 2609 ret = f2fs_trim_fs(sbi, &range); 2610 mnt_drop_write_file(filp); 2611 if (ret < 0) 2612 return ret; 2613 2614 if (copy_to_user((struct fstrim_range __user *)arg, &range, 2615 sizeof(range))) 2616 return -EFAULT; 2617 f2fs_update_time(sbi, REQ_TIME); 2618 return 0; 2619 } 2620 2621 static bool uuid_is_nonzero(__u8 u[16]) 2622 { 2623 int i; 2624 2625 for (i = 0; i < 16; i++) 2626 if (u[i]) 2627 return true; 2628 return false; 2629 } 2630 2631 static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) 2632 { 2633 struct inode *inode = file_inode(filp); 2634 int ret; 2635 2636 if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode))) 2637 return -EOPNOTSUPP; 2638 2639 ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg); 2640 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2641 return ret; 2642 } 2643 2644 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) 2645 { 2646 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2647 return -EOPNOTSUPP; 2648 return fscrypt_ioctl_get_policy(filp, (void __user *)arg); 2649 } 2650 2651 static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) 2652 { 2653 struct inode *inode = file_inode(filp); 2654 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2655 u8 encrypt_pw_salt[16]; 2656 int err; 2657 2658 if (!f2fs_sb_has_encrypt(sbi)) 2659 return -EOPNOTSUPP; 2660 2661 err = mnt_want_write_file(filp); 2662 if (err) 2663 return err; 2664 2665 f2fs_down_write(&sbi->sb_lock); 2666 2667 if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) 2668 goto got_it; 2669 2670 /* update superblock with uuid */ 2671 generate_random_uuid(sbi->raw_super->encrypt_pw_salt); 2672 2673 err = f2fs_commit_super(sbi, false); 2674 if (err) { 2675 /* undo new data */ 2676 memset(sbi->raw_super->encrypt_pw_salt, 0, 16); 2677 goto out_err; 2678 } 2679 got_it: 2680 memcpy(encrypt_pw_salt, sbi->raw_super->encrypt_pw_salt, 16); 2681 out_err: 2682 f2fs_up_write(&sbi->sb_lock); 2683 mnt_drop_write_file(filp); 2684 2685 if (!err && copy_to_user((__u8 __user *)arg, encrypt_pw_salt, 16)) 2686 err = -EFAULT; 2687 2688 return err; 2689 } 2690 2691 static int f2fs_ioc_get_encryption_policy_ex(struct file *filp, 2692 unsigned long arg) 2693 { 2694 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2695 return -EOPNOTSUPP; 2696 2697 return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg); 2698 } 2699 2700 static int f2fs_ioc_add_encryption_key(struct file *filp, unsigned long arg) 2701 { 2702 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2703 return -EOPNOTSUPP; 2704 2705 return fscrypt_ioctl_add_key(filp, (void __user *)arg); 2706 } 2707 2708 static int f2fs_ioc_remove_encryption_key(struct file *filp, unsigned long arg) 2709 { 2710 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2711 return -EOPNOTSUPP; 2712 2713 return fscrypt_ioctl_remove_key(filp, (void __user *)arg); 2714 } 2715 2716 static int f2fs_ioc_remove_encryption_key_all_users(struct file *filp, 2717 unsigned long arg) 2718 { 2719 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2720 return -EOPNOTSUPP; 2721 2722 return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg); 2723 } 2724 2725 static int f2fs_ioc_get_encryption_key_status(struct file *filp, 2726 unsigned long arg) 2727 { 2728 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2729 return -EOPNOTSUPP; 2730 2731 return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); 2732 } 2733 2734 static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg) 2735 { 2736 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2737 return -EOPNOTSUPP; 2738 2739 return fscrypt_ioctl_get_nonce(filp, (void __user *)arg); 2740 } 2741 2742 static int f2fs_ioc_gc(struct file *filp, unsigned long arg) 2743 { 2744 struct inode *inode = file_inode(filp); 2745 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2746 struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, 2747 .no_bg_gc = false, 2748 .should_migrate_blocks = false, 2749 .nr_free_secs = 0 }; 2750 __u32 sync; 2751 int ret; 2752 2753 if (!capable(CAP_SYS_ADMIN)) 2754 return -EPERM; 2755 2756 if (get_user(sync, (__u32 __user *)arg)) 2757 return -EFAULT; 2758 2759 if (f2fs_readonly(sbi->sb)) 2760 return -EROFS; 2761 2762 ret = mnt_want_write_file(filp); 2763 if (ret) 2764 return ret; 2765 2766 if (!sync) { 2767 if (!f2fs_down_write_trylock(&sbi->gc_lock)) { 2768 ret = -EBUSY; 2769 goto out; 2770 } 2771 } else { 2772 f2fs_down_write(&sbi->gc_lock); 2773 } 2774 2775 gc_control.init_gc_type = sync ? FG_GC : BG_GC; 2776 gc_control.err_gc_skipped = sync; 2777 stat_inc_gc_call_count(sbi, FOREGROUND); 2778 ret = f2fs_gc(sbi, &gc_control); 2779 out: 2780 mnt_drop_write_file(filp); 2781 return ret; 2782 } 2783 2784 static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range) 2785 { 2786 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); 2787 struct f2fs_gc_control gc_control = { 2788 .init_gc_type = range->sync ? FG_GC : BG_GC, 2789 .no_bg_gc = false, 2790 .should_migrate_blocks = false, 2791 .err_gc_skipped = range->sync, 2792 .nr_free_secs = 0 }; 2793 u64 end; 2794 int ret; 2795 2796 if (!capable(CAP_SYS_ADMIN)) 2797 return -EPERM; 2798 if (f2fs_readonly(sbi->sb)) 2799 return -EROFS; 2800 2801 end = range->start + range->len; 2802 if (end < range->start || range->start < MAIN_BLKADDR(sbi) || 2803 end >= MAX_BLKADDR(sbi)) 2804 return -EINVAL; 2805 2806 ret = mnt_want_write_file(filp); 2807 if (ret) 2808 return ret; 2809 2810 do_more: 2811 if (!range->sync) { 2812 if (!f2fs_down_write_trylock(&sbi->gc_lock)) { 2813 ret = -EBUSY; 2814 goto out; 2815 } 2816 } else { 2817 f2fs_down_write(&sbi->gc_lock); 2818 } 2819 2820 gc_control.victim_segno = GET_SEGNO(sbi, range->start); 2821 stat_inc_gc_call_count(sbi, FOREGROUND); 2822 ret = f2fs_gc(sbi, &gc_control); 2823 if (ret) { 2824 if (ret == -EBUSY) 2825 ret = -EAGAIN; 2826 goto out; 2827 } 2828 range->start += CAP_BLKS_PER_SEC(sbi); 2829 if (range->start <= end) 2830 goto do_more; 2831 out: 2832 mnt_drop_write_file(filp); 2833 return ret; 2834 } 2835 2836 static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) 2837 { 2838 struct f2fs_gc_range range; 2839 2840 if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg, 2841 sizeof(range))) 2842 return -EFAULT; 2843 return __f2fs_ioc_gc_range(filp, &range); 2844 } 2845 2846 static int f2fs_ioc_write_checkpoint(struct file *filp) 2847 { 2848 struct inode *inode = file_inode(filp); 2849 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2850 int ret; 2851 2852 if (!capable(CAP_SYS_ADMIN)) 2853 return -EPERM; 2854 2855 if (f2fs_readonly(sbi->sb)) 2856 return -EROFS; 2857 2858 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 2859 f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled."); 2860 return -EINVAL; 2861 } 2862 2863 ret = mnt_want_write_file(filp); 2864 if (ret) 2865 return ret; 2866 2867 ret = f2fs_sync_fs(sbi->sb, 1); 2868 2869 mnt_drop_write_file(filp); 2870 return ret; 2871 } 2872 2873 static int f2fs_defragment_range(struct f2fs_sb_info *sbi, 2874 struct file *filp, 2875 struct f2fs_defragment *range) 2876 { 2877 struct inode *inode = file_inode(filp); 2878 struct f2fs_map_blocks map = { .m_next_extent = NULL, 2879 .m_seg_type = NO_CHECK_TYPE, 2880 .m_may_create = false }; 2881 struct extent_info ei = {}; 2882 pgoff_t pg_start, pg_end, next_pgofs; 2883 unsigned int total = 0, sec_num; 2884 block_t blk_end = 0; 2885 bool fragmented = false; 2886 int err; 2887 2888 f2fs_balance_fs(sbi, true); 2889 2890 inode_lock(inode); 2891 pg_start = range->start >> PAGE_SHIFT; 2892 pg_end = min_t(pgoff_t, 2893 (range->start + range->len) >> PAGE_SHIFT, 2894 DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE)); 2895 2896 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) || 2897 f2fs_is_atomic_file(inode)) { 2898 err = -EINVAL; 2899 goto unlock_out; 2900 } 2901 2902 /* if in-place-update policy is enabled, don't waste time here */ 2903 set_inode_flag(inode, FI_OPU_WRITE); 2904 if (f2fs_should_update_inplace(inode, NULL)) { 2905 err = -EINVAL; 2906 goto out; 2907 } 2908 2909 /* writeback all dirty pages in the range */ 2910 err = filemap_write_and_wait_range(inode->i_mapping, 2911 pg_start << PAGE_SHIFT, 2912 (pg_end << PAGE_SHIFT) - 1); 2913 if (err) 2914 goto out; 2915 2916 /* 2917 * lookup mapping info in extent cache, skip defragmenting if physical 2918 * block addresses are continuous. 2919 */ 2920 if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) { 2921 if ((pgoff_t)ei.fofs + ei.len >= pg_end) 2922 goto out; 2923 } 2924 2925 map.m_lblk = pg_start; 2926 map.m_next_pgofs = &next_pgofs; 2927 2928 /* 2929 * lookup mapping info in dnode page cache, skip defragmenting if all 2930 * physical block addresses are continuous even if there are hole(s) 2931 * in logical blocks. 2932 */ 2933 while (map.m_lblk < pg_end) { 2934 map.m_len = pg_end - map.m_lblk; 2935 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); 2936 if (err) 2937 goto out; 2938 2939 if (!(map.m_flags & F2FS_MAP_FLAGS)) { 2940 map.m_lblk = next_pgofs; 2941 continue; 2942 } 2943 2944 if (blk_end && blk_end != map.m_pblk) 2945 fragmented = true; 2946 2947 /* record total count of block that we're going to move */ 2948 total += map.m_len; 2949 2950 blk_end = map.m_pblk + map.m_len; 2951 2952 map.m_lblk += map.m_len; 2953 } 2954 2955 if (!fragmented) { 2956 total = 0; 2957 goto out; 2958 } 2959 2960 sec_num = DIV_ROUND_UP(total, CAP_BLKS_PER_SEC(sbi)); 2961 2962 /* 2963 * make sure there are enough free section for LFS allocation, this can 2964 * avoid defragment running in SSR mode when free section are allocated 2965 * intensively 2966 */ 2967 if (has_not_enough_free_secs(sbi, 0, sec_num)) { 2968 err = -EAGAIN; 2969 goto out; 2970 } 2971 2972 map.m_lblk = pg_start; 2973 map.m_len = pg_end - pg_start; 2974 total = 0; 2975 2976 while (map.m_lblk < pg_end) { 2977 pgoff_t idx; 2978 int cnt = 0; 2979 2980 do_map: 2981 map.m_len = pg_end - map.m_lblk; 2982 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); 2983 if (err) 2984 goto clear_out; 2985 2986 if (!(map.m_flags & F2FS_MAP_FLAGS)) { 2987 map.m_lblk = next_pgofs; 2988 goto check; 2989 } 2990 2991 set_inode_flag(inode, FI_SKIP_WRITES); 2992 2993 idx = map.m_lblk; 2994 while (idx < map.m_lblk + map.m_len && 2995 cnt < BLKS_PER_SEG(sbi)) { 2996 struct folio *folio; 2997 2998 folio = f2fs_get_lock_data_folio(inode, idx, true); 2999 if (IS_ERR(folio)) { 3000 err = PTR_ERR(folio); 3001 goto clear_out; 3002 } 3003 3004 f2fs_folio_wait_writeback(folio, DATA, true, true); 3005 3006 folio_mark_dirty(folio); 3007 folio_set_f2fs_gcing(folio); 3008 f2fs_folio_put(folio, true); 3009 3010 idx++; 3011 cnt++; 3012 total++; 3013 } 3014 3015 map.m_lblk = idx; 3016 check: 3017 if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi)) 3018 goto do_map; 3019 3020 clear_inode_flag(inode, FI_SKIP_WRITES); 3021 3022 err = filemap_fdatawrite(inode->i_mapping); 3023 if (err) 3024 goto out; 3025 } 3026 clear_out: 3027 clear_inode_flag(inode, FI_SKIP_WRITES); 3028 out: 3029 clear_inode_flag(inode, FI_OPU_WRITE); 3030 unlock_out: 3031 inode_unlock(inode); 3032 if (!err) 3033 range->len = (u64)total << PAGE_SHIFT; 3034 return err; 3035 } 3036 3037 static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) 3038 { 3039 struct inode *inode = file_inode(filp); 3040 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3041 struct f2fs_defragment range; 3042 int err; 3043 3044 if (!capable(CAP_SYS_ADMIN)) 3045 return -EPERM; 3046 3047 if (!S_ISREG(inode->i_mode)) 3048 return -EINVAL; 3049 3050 if (f2fs_readonly(sbi->sb)) 3051 return -EROFS; 3052 3053 if (copy_from_user(&range, (struct f2fs_defragment __user *)arg, 3054 sizeof(range))) 3055 return -EFAULT; 3056 3057 /* verify alignment of offset & size */ 3058 if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1)) 3059 return -EINVAL; 3060 3061 if (unlikely((range.start + range.len) >> PAGE_SHIFT > 3062 max_file_blocks(inode))) 3063 return -EINVAL; 3064 3065 err = mnt_want_write_file(filp); 3066 if (err) 3067 return err; 3068 3069 err = f2fs_defragment_range(sbi, filp, &range); 3070 mnt_drop_write_file(filp); 3071 3072 if (range.len) 3073 f2fs_update_time(sbi, REQ_TIME); 3074 if (err < 0) 3075 return err; 3076 3077 if (copy_to_user((struct f2fs_defragment __user *)arg, &range, 3078 sizeof(range))) 3079 return -EFAULT; 3080 3081 return 0; 3082 } 3083 3084 static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, 3085 struct file *file_out, loff_t pos_out, size_t len) 3086 { 3087 struct inode *src = file_inode(file_in); 3088 struct inode *dst = file_inode(file_out); 3089 struct f2fs_sb_info *sbi = F2FS_I_SB(src); 3090 size_t olen = len, dst_max_i_size = 0; 3091 size_t dst_osize; 3092 int ret; 3093 3094 if (file_in->f_path.mnt != file_out->f_path.mnt || 3095 src->i_sb != dst->i_sb) 3096 return -EXDEV; 3097 3098 if (unlikely(f2fs_readonly(src->i_sb))) 3099 return -EROFS; 3100 3101 if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode)) 3102 return -EINVAL; 3103 3104 if (IS_ENCRYPTED(src) || IS_ENCRYPTED(dst)) 3105 return -EOPNOTSUPP; 3106 3107 if (pos_out < 0 || pos_in < 0) 3108 return -EINVAL; 3109 3110 if (src == dst) { 3111 if (pos_in == pos_out) 3112 return 0; 3113 if (pos_out > pos_in && pos_out < pos_in + len) 3114 return -EINVAL; 3115 } 3116 3117 inode_lock(src); 3118 if (src != dst) { 3119 ret = -EBUSY; 3120 if (!inode_trylock(dst)) 3121 goto out; 3122 } 3123 3124 if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) || 3125 f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) { 3126 ret = -EOPNOTSUPP; 3127 goto out_unlock; 3128 } 3129 3130 if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) { 3131 ret = -EINVAL; 3132 goto out_unlock; 3133 } 3134 3135 ret = -EINVAL; 3136 if (pos_in + len > src->i_size || pos_in + len < pos_in) 3137 goto out_unlock; 3138 if (len == 0) 3139 olen = len = src->i_size - pos_in; 3140 if (pos_in + len == src->i_size) 3141 len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in; 3142 if (len == 0) { 3143 ret = 0; 3144 goto out_unlock; 3145 } 3146 3147 dst_osize = dst->i_size; 3148 if (pos_out + olen > dst->i_size) 3149 dst_max_i_size = pos_out + olen; 3150 3151 /* verify the end result is block aligned */ 3152 if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) || 3153 !IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) || 3154 !IS_ALIGNED(pos_out, F2FS_BLKSIZE)) 3155 goto out_unlock; 3156 3157 ret = f2fs_convert_inline_inode(src); 3158 if (ret) 3159 goto out_unlock; 3160 3161 ret = f2fs_convert_inline_inode(dst); 3162 if (ret) 3163 goto out_unlock; 3164 3165 /* write out all dirty pages from offset */ 3166 ret = filemap_write_and_wait_range(src->i_mapping, 3167 pos_in, pos_in + len); 3168 if (ret) 3169 goto out_unlock; 3170 3171 ret = filemap_write_and_wait_range(dst->i_mapping, 3172 pos_out, pos_out + len); 3173 if (ret) 3174 goto out_unlock; 3175 3176 f2fs_balance_fs(sbi, true); 3177 3178 f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); 3179 if (src != dst) { 3180 ret = -EBUSY; 3181 if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) 3182 goto out_src; 3183 } 3184 3185 f2fs_lock_op(sbi); 3186 ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in), 3187 F2FS_BYTES_TO_BLK(pos_out), 3188 F2FS_BYTES_TO_BLK(len), false); 3189 3190 if (!ret) { 3191 if (dst_max_i_size) 3192 f2fs_i_size_write(dst, dst_max_i_size); 3193 else if (dst_osize != dst->i_size) 3194 f2fs_i_size_write(dst, dst_osize); 3195 } 3196 f2fs_unlock_op(sbi); 3197 3198 if (src != dst) 3199 f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); 3200 out_src: 3201 f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); 3202 if (ret) 3203 goto out_unlock; 3204 3205 inode_set_mtime_to_ts(src, inode_set_ctime_current(src)); 3206 f2fs_mark_inode_dirty_sync(src, false); 3207 if (src != dst) { 3208 inode_set_mtime_to_ts(dst, inode_set_ctime_current(dst)); 3209 f2fs_mark_inode_dirty_sync(dst, false); 3210 } 3211 f2fs_update_time(sbi, REQ_TIME); 3212 3213 out_unlock: 3214 if (src != dst) 3215 inode_unlock(dst); 3216 out: 3217 inode_unlock(src); 3218 return ret; 3219 } 3220 3221 static int __f2fs_ioc_move_range(struct file *filp, 3222 struct f2fs_move_range *range) 3223 { 3224 int err; 3225 3226 if (!(filp->f_mode & FMODE_READ) || 3227 !(filp->f_mode & FMODE_WRITE)) 3228 return -EBADF; 3229 3230 CLASS(fd, dst)(range->dst_fd); 3231 if (fd_empty(dst)) 3232 return -EBADF; 3233 3234 if (!(fd_file(dst)->f_mode & FMODE_WRITE)) 3235 return -EBADF; 3236 3237 err = mnt_want_write_file(filp); 3238 if (err) 3239 return err; 3240 3241 err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst), 3242 range->pos_out, range->len); 3243 3244 mnt_drop_write_file(filp); 3245 return err; 3246 } 3247 3248 static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) 3249 { 3250 struct f2fs_move_range range; 3251 3252 if (copy_from_user(&range, (struct f2fs_move_range __user *)arg, 3253 sizeof(range))) 3254 return -EFAULT; 3255 return __f2fs_ioc_move_range(filp, &range); 3256 } 3257 3258 static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) 3259 { 3260 struct inode *inode = file_inode(filp); 3261 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3262 struct sit_info *sm = SIT_I(sbi); 3263 unsigned int start_segno = 0, end_segno = 0; 3264 unsigned int dev_start_segno = 0, dev_end_segno = 0; 3265 struct f2fs_flush_device range; 3266 struct f2fs_gc_control gc_control = { 3267 .init_gc_type = FG_GC, 3268 .should_migrate_blocks = true, 3269 .err_gc_skipped = true, 3270 .nr_free_secs = 0 }; 3271 int ret; 3272 3273 if (!capable(CAP_SYS_ADMIN)) 3274 return -EPERM; 3275 3276 if (f2fs_readonly(sbi->sb)) 3277 return -EROFS; 3278 3279 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) 3280 return -EINVAL; 3281 3282 if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, 3283 sizeof(range))) 3284 return -EFAULT; 3285 3286 if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num || 3287 __is_large_section(sbi)) { 3288 f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1", 3289 range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi)); 3290 return -EINVAL; 3291 } 3292 3293 ret = mnt_want_write_file(filp); 3294 if (ret) 3295 return ret; 3296 3297 if (range.dev_num != 0) 3298 dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk); 3299 dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk); 3300 3301 start_segno = sm->last_victim[FLUSH_DEVICE]; 3302 if (start_segno < dev_start_segno || start_segno >= dev_end_segno) 3303 start_segno = dev_start_segno; 3304 end_segno = min(start_segno + range.segments, dev_end_segno); 3305 3306 while (start_segno < end_segno) { 3307 if (!f2fs_down_write_trylock(&sbi->gc_lock)) { 3308 ret = -EBUSY; 3309 goto out; 3310 } 3311 sm->last_victim[GC_CB] = end_segno + 1; 3312 sm->last_victim[GC_GREEDY] = end_segno + 1; 3313 sm->last_victim[ALLOC_NEXT] = end_segno + 1; 3314 3315 gc_control.victim_segno = start_segno; 3316 stat_inc_gc_call_count(sbi, FOREGROUND); 3317 ret = f2fs_gc(sbi, &gc_control); 3318 if (ret == -EAGAIN) 3319 ret = 0; 3320 else if (ret < 0) 3321 break; 3322 start_segno++; 3323 } 3324 out: 3325 mnt_drop_write_file(filp); 3326 return ret; 3327 } 3328 3329 static int f2fs_ioc_get_features(struct file *filp, unsigned long arg) 3330 { 3331 struct inode *inode = file_inode(filp); 3332 u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature); 3333 3334 /* Must validate to set it with SQLite behavior in Android. */ 3335 sb_feature |= F2FS_FEATURE_ATOMIC_WRITE; 3336 3337 return put_user(sb_feature, (u32 __user *)arg); 3338 } 3339 3340 #ifdef CONFIG_QUOTA 3341 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) 3342 { 3343 struct dquot *transfer_to[MAXQUOTAS] = {}; 3344 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3345 struct super_block *sb = sbi->sb; 3346 int err; 3347 3348 transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); 3349 if (IS_ERR(transfer_to[PRJQUOTA])) 3350 return PTR_ERR(transfer_to[PRJQUOTA]); 3351 3352 err = __dquot_transfer(inode, transfer_to); 3353 if (err) 3354 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); 3355 dqput(transfer_to[PRJQUOTA]); 3356 return err; 3357 } 3358 3359 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) 3360 { 3361 struct f2fs_inode_info *fi = F2FS_I(inode); 3362 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3363 struct f2fs_inode *ri = NULL; 3364 kprojid_t kprojid; 3365 int err; 3366 3367 if (!f2fs_sb_has_project_quota(sbi)) { 3368 if (projid != F2FS_DEF_PROJID) 3369 return -EOPNOTSUPP; 3370 else 3371 return 0; 3372 } 3373 3374 if (!f2fs_has_extra_attr(inode)) 3375 return -EOPNOTSUPP; 3376 3377 kprojid = make_kprojid(&init_user_ns, (projid_t)projid); 3378 3379 if (projid_eq(kprojid, fi->i_projid)) 3380 return 0; 3381 3382 err = -EPERM; 3383 /* Is it quota file? Do not allow user to mess with it */ 3384 if (IS_NOQUOTA(inode)) 3385 return err; 3386 3387 if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) 3388 return -EOVERFLOW; 3389 3390 err = f2fs_dquot_initialize(inode); 3391 if (err) 3392 return err; 3393 3394 f2fs_lock_op(sbi); 3395 err = f2fs_transfer_project_quota(inode, kprojid); 3396 if (err) 3397 goto out_unlock; 3398 3399 fi->i_projid = kprojid; 3400 inode_set_ctime_current(inode); 3401 f2fs_mark_inode_dirty_sync(inode, true); 3402 out_unlock: 3403 f2fs_unlock_op(sbi); 3404 return err; 3405 } 3406 #else 3407 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) 3408 { 3409 return 0; 3410 } 3411 3412 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) 3413 { 3414 if (projid != F2FS_DEF_PROJID) 3415 return -EOPNOTSUPP; 3416 return 0; 3417 } 3418 #endif 3419 3420 int f2fs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) 3421 { 3422 struct inode *inode = d_inode(dentry); 3423 struct f2fs_inode_info *fi = F2FS_I(inode); 3424 u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags); 3425 3426 if (IS_ENCRYPTED(inode)) 3427 fsflags |= FS_ENCRYPT_FL; 3428 if (IS_VERITY(inode)) 3429 fsflags |= FS_VERITY_FL; 3430 if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) 3431 fsflags |= FS_INLINE_DATA_FL; 3432 if (is_inode_flag_set(inode, FI_PIN_FILE)) 3433 fsflags |= FS_NOCOW_FL; 3434 3435 fileattr_fill_flags(fa, fsflags & F2FS_GETTABLE_FS_FL); 3436 3437 if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) 3438 fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid); 3439 3440 return 0; 3441 } 3442 3443 int f2fs_fileattr_set(struct mnt_idmap *idmap, 3444 struct dentry *dentry, struct file_kattr *fa) 3445 { 3446 struct inode *inode = d_inode(dentry); 3447 u32 fsflags = fa->flags, mask = F2FS_SETTABLE_FS_FL; 3448 u32 iflags; 3449 int err; 3450 3451 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 3452 return -EIO; 3453 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) 3454 return -ENOSPC; 3455 if (fsflags & ~F2FS_GETTABLE_FS_FL) 3456 return -EOPNOTSUPP; 3457 fsflags &= F2FS_SETTABLE_FS_FL; 3458 if (!fa->flags_valid) 3459 mask &= FS_COMMON_FL; 3460 3461 iflags = f2fs_fsflags_to_iflags(fsflags); 3462 if (f2fs_mask_flags(inode->i_mode, iflags) != iflags) 3463 return -EOPNOTSUPP; 3464 3465 err = f2fs_setflags_common(inode, iflags, f2fs_fsflags_to_iflags(mask)); 3466 if (!err) 3467 err = f2fs_ioc_setproject(inode, fa->fsx_projid); 3468 3469 return err; 3470 } 3471 3472 int f2fs_pin_file_control(struct inode *inode, bool inc) 3473 { 3474 struct f2fs_inode_info *fi = F2FS_I(inode); 3475 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3476 3477 if (IS_DEVICE_ALIASING(inode)) 3478 return -EINVAL; 3479 3480 if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { 3481 f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", 3482 __func__, inode->i_ino, fi->i_gc_failures); 3483 clear_inode_flag(inode, FI_PIN_FILE); 3484 return -EAGAIN; 3485 } 3486 3487 /* Use i_gc_failures for normal file as a risk signal. */ 3488 if (inc) 3489 f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1); 3490 3491 return 0; 3492 } 3493 3494 static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) 3495 { 3496 struct inode *inode = file_inode(filp); 3497 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3498 __u32 pin; 3499 int ret = 0; 3500 3501 if (get_user(pin, (__u32 __user *)arg)) 3502 return -EFAULT; 3503 3504 if (!S_ISREG(inode->i_mode)) 3505 return -EINVAL; 3506 3507 if (f2fs_readonly(sbi->sb)) 3508 return -EROFS; 3509 3510 if (!pin && IS_DEVICE_ALIASING(inode)) 3511 return -EOPNOTSUPP; 3512 3513 ret = mnt_want_write_file(filp); 3514 if (ret) 3515 return ret; 3516 3517 inode_lock(inode); 3518 3519 if (f2fs_is_atomic_file(inode)) { 3520 ret = -EINVAL; 3521 goto out; 3522 } 3523 3524 if (!pin) { 3525 clear_inode_flag(inode, FI_PIN_FILE); 3526 f2fs_i_gc_failures_write(inode, 0); 3527 goto done; 3528 } else if (f2fs_is_pinned_file(inode)) { 3529 goto done; 3530 } 3531 3532 if (F2FS_HAS_BLOCKS(inode)) { 3533 ret = -EFBIG; 3534 goto out; 3535 } 3536 3537 /* Let's allow file pinning on zoned device. */ 3538 if (!f2fs_sb_has_blkzoned(sbi) && 3539 f2fs_should_update_outplace(inode, NULL)) { 3540 ret = -EINVAL; 3541 goto out; 3542 } 3543 3544 if (f2fs_pin_file_control(inode, false)) { 3545 ret = -EAGAIN; 3546 goto out; 3547 } 3548 3549 ret = f2fs_convert_inline_inode(inode); 3550 if (ret) 3551 goto out; 3552 3553 if (!f2fs_disable_compressed_file(inode)) { 3554 ret = -EOPNOTSUPP; 3555 goto out; 3556 } 3557 3558 set_inode_flag(inode, FI_PIN_FILE); 3559 ret = F2FS_I(inode)->i_gc_failures; 3560 done: 3561 f2fs_update_time(sbi, REQ_TIME); 3562 out: 3563 inode_unlock(inode); 3564 mnt_drop_write_file(filp); 3565 return ret; 3566 } 3567 3568 static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) 3569 { 3570 struct inode *inode = file_inode(filp); 3571 __u32 pin = 0; 3572 3573 if (is_inode_flag_set(inode, FI_PIN_FILE)) 3574 pin = F2FS_I(inode)->i_gc_failures; 3575 return put_user(pin, (u32 __user *)arg); 3576 } 3577 3578 static int f2fs_ioc_get_dev_alias_file(struct file *filp, unsigned long arg) 3579 { 3580 return put_user(IS_DEVICE_ALIASING(file_inode(filp)) ? 1 : 0, 3581 (u32 __user *)arg); 3582 } 3583 3584 static int f2fs_ioc_io_prio(struct file *filp, unsigned long arg) 3585 { 3586 struct inode *inode = file_inode(filp); 3587 __u32 level; 3588 3589 if (get_user(level, (__u32 __user *)arg)) 3590 return -EFAULT; 3591 3592 if (!S_ISREG(inode->i_mode) || level >= F2FS_IOPRIO_MAX) 3593 return -EINVAL; 3594 3595 inode_lock(inode); 3596 F2FS_I(inode)->ioprio_hint = level; 3597 inode_unlock(inode); 3598 return 0; 3599 } 3600 3601 int f2fs_precache_extents(struct inode *inode) 3602 { 3603 struct f2fs_inode_info *fi = F2FS_I(inode); 3604 struct f2fs_map_blocks map; 3605 pgoff_t m_next_extent; 3606 loff_t end; 3607 int err; 3608 3609 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 3610 return -EOPNOTSUPP; 3611 3612 map.m_lblk = 0; 3613 map.m_pblk = 0; 3614 map.m_next_pgofs = NULL; 3615 map.m_next_extent = &m_next_extent; 3616 map.m_seg_type = NO_CHECK_TYPE; 3617 map.m_may_create = false; 3618 end = F2FS_BLK_ALIGN(i_size_read(inode)); 3619 3620 while (map.m_lblk < end) { 3621 map.m_len = end - map.m_lblk; 3622 3623 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 3624 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE); 3625 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 3626 if (err || !map.m_len) 3627 return err; 3628 3629 map.m_lblk = m_next_extent; 3630 } 3631 3632 return 0; 3633 } 3634 3635 static int f2fs_ioc_precache_extents(struct file *filp) 3636 { 3637 return f2fs_precache_extents(file_inode(filp)); 3638 } 3639 3640 static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) 3641 { 3642 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); 3643 __u64 block_count; 3644 3645 if (!capable(CAP_SYS_ADMIN)) 3646 return -EPERM; 3647 3648 if (f2fs_readonly(sbi->sb)) 3649 return -EROFS; 3650 3651 if (copy_from_user(&block_count, (void __user *)arg, 3652 sizeof(block_count))) 3653 return -EFAULT; 3654 3655 return f2fs_resize_fs(filp, block_count); 3656 } 3657 3658 static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg) 3659 { 3660 struct inode *inode = file_inode(filp); 3661 3662 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 3663 3664 if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) { 3665 f2fs_warn(F2FS_I_SB(inode), 3666 "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem", 3667 inode->i_ino); 3668 return -EOPNOTSUPP; 3669 } 3670 3671 return fsverity_ioctl_enable(filp, (const void __user *)arg); 3672 } 3673 3674 static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg) 3675 { 3676 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp)))) 3677 return -EOPNOTSUPP; 3678 3679 return fsverity_ioctl_measure(filp, (void __user *)arg); 3680 } 3681 3682 static int f2fs_ioc_read_verity_metadata(struct file *filp, unsigned long arg) 3683 { 3684 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp)))) 3685 return -EOPNOTSUPP; 3686 3687 return fsverity_ioctl_read_metadata(filp, (const void __user *)arg); 3688 } 3689 3690 static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg) 3691 { 3692 struct inode *inode = file_inode(filp); 3693 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3694 char *vbuf; 3695 int count; 3696 int err = 0; 3697 3698 vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL); 3699 if (!vbuf) 3700 return -ENOMEM; 3701 3702 f2fs_down_read(&sbi->sb_lock); 3703 count = utf16s_to_utf8s(sbi->raw_super->volume_name, 3704 ARRAY_SIZE(sbi->raw_super->volume_name), 3705 UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME); 3706 f2fs_up_read(&sbi->sb_lock); 3707 3708 if (copy_to_user((char __user *)arg, vbuf, 3709 min(FSLABEL_MAX, count))) 3710 err = -EFAULT; 3711 3712 kfree(vbuf); 3713 return err; 3714 } 3715 3716 static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg) 3717 { 3718 struct inode *inode = file_inode(filp); 3719 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3720 char *vbuf; 3721 int err = 0; 3722 3723 if (!capable(CAP_SYS_ADMIN)) 3724 return -EPERM; 3725 3726 vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX); 3727 if (IS_ERR(vbuf)) 3728 return PTR_ERR(vbuf); 3729 3730 err = mnt_want_write_file(filp); 3731 if (err) 3732 goto out; 3733 3734 f2fs_down_write(&sbi->sb_lock); 3735 3736 memset(sbi->raw_super->volume_name, 0, 3737 sizeof(sbi->raw_super->volume_name)); 3738 utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN, 3739 sbi->raw_super->volume_name, 3740 ARRAY_SIZE(sbi->raw_super->volume_name)); 3741 3742 err = f2fs_commit_super(sbi, false); 3743 3744 f2fs_up_write(&sbi->sb_lock); 3745 3746 mnt_drop_write_file(filp); 3747 out: 3748 kfree(vbuf); 3749 return err; 3750 } 3751 3752 static int f2fs_get_compress_blocks(struct inode *inode, __u64 *blocks) 3753 { 3754 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 3755 return -EOPNOTSUPP; 3756 3757 if (!f2fs_compressed_file(inode)) 3758 return -EINVAL; 3759 3760 *blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks); 3761 3762 return 0; 3763 } 3764 3765 static int f2fs_ioc_get_compress_blocks(struct file *filp, unsigned long arg) 3766 { 3767 struct inode *inode = file_inode(filp); 3768 __u64 blocks; 3769 int ret; 3770 3771 ret = f2fs_get_compress_blocks(inode, &blocks); 3772 if (ret < 0) 3773 return ret; 3774 3775 return put_user(blocks, (u64 __user *)arg); 3776 } 3777 3778 static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) 3779 { 3780 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 3781 unsigned int released_blocks = 0; 3782 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 3783 block_t blkaddr; 3784 int i; 3785 3786 for (i = 0; i < count; i++) { 3787 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3788 dn->ofs_in_node + i); 3789 3790 if (!__is_valid_data_blkaddr(blkaddr)) 3791 continue; 3792 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, 3793 DATA_GENERIC_ENHANCE))) 3794 return -EFSCORRUPTED; 3795 } 3796 3797 while (count) { 3798 int compr_blocks = 0; 3799 3800 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { 3801 blkaddr = f2fs_data_blkaddr(dn); 3802 3803 if (i == 0) { 3804 if (blkaddr == COMPRESS_ADDR) 3805 continue; 3806 dn->ofs_in_node += cluster_size; 3807 goto next; 3808 } 3809 3810 if (__is_valid_data_blkaddr(blkaddr)) 3811 compr_blocks++; 3812 3813 if (blkaddr != NEW_ADDR) 3814 continue; 3815 3816 f2fs_set_data_blkaddr(dn, NULL_ADDR); 3817 } 3818 3819 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false); 3820 dec_valid_block_count(sbi, dn->inode, 3821 cluster_size - compr_blocks); 3822 3823 released_blocks += cluster_size - compr_blocks; 3824 next: 3825 count -= cluster_size; 3826 } 3827 3828 return released_blocks; 3829 } 3830 3831 static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) 3832 { 3833 struct inode *inode = file_inode(filp); 3834 struct f2fs_inode_info *fi = F2FS_I(inode); 3835 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3836 pgoff_t page_idx = 0, last_idx; 3837 unsigned int released_blocks = 0; 3838 int ret; 3839 int writecount; 3840 3841 if (!f2fs_sb_has_compression(sbi)) 3842 return -EOPNOTSUPP; 3843 3844 if (f2fs_readonly(sbi->sb)) 3845 return -EROFS; 3846 3847 ret = mnt_want_write_file(filp); 3848 if (ret) 3849 return ret; 3850 3851 f2fs_balance_fs(sbi, true); 3852 3853 inode_lock(inode); 3854 3855 writecount = atomic_read(&inode->i_writecount); 3856 if ((filp->f_mode & FMODE_WRITE && writecount != 1) || 3857 (!(filp->f_mode & FMODE_WRITE) && writecount)) { 3858 ret = -EBUSY; 3859 goto out; 3860 } 3861 3862 if (!f2fs_compressed_file(inode) || 3863 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 3864 ret = -EINVAL; 3865 goto out; 3866 } 3867 3868 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 3869 if (ret) 3870 goto out; 3871 3872 if (!atomic_read(&fi->i_compr_blocks)) { 3873 ret = -EPERM; 3874 goto out; 3875 } 3876 3877 set_inode_flag(inode, FI_COMPRESS_RELEASED); 3878 inode_set_ctime_current(inode); 3879 f2fs_mark_inode_dirty_sync(inode, true); 3880 3881 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 3882 filemap_invalidate_lock(inode->i_mapping); 3883 3884 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 3885 3886 while (page_idx < last_idx) { 3887 struct dnode_of_data dn; 3888 pgoff_t end_offset, count; 3889 3890 f2fs_lock_op(sbi); 3891 3892 set_new_dnode(&dn, inode, NULL, NULL, 0); 3893 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); 3894 if (ret) { 3895 f2fs_unlock_op(sbi); 3896 if (ret == -ENOENT) { 3897 page_idx = f2fs_get_next_page_offset(&dn, 3898 page_idx); 3899 ret = 0; 3900 continue; 3901 } 3902 break; 3903 } 3904 3905 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 3906 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); 3907 count = round_up(count, fi->i_cluster_size); 3908 3909 ret = release_compress_blocks(&dn, count); 3910 3911 f2fs_put_dnode(&dn); 3912 3913 f2fs_unlock_op(sbi); 3914 3915 if (ret < 0) 3916 break; 3917 3918 page_idx += count; 3919 released_blocks += ret; 3920 } 3921 3922 filemap_invalidate_unlock(inode->i_mapping); 3923 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 3924 out: 3925 if (released_blocks) 3926 f2fs_update_time(sbi, REQ_TIME); 3927 inode_unlock(inode); 3928 3929 mnt_drop_write_file(filp); 3930 3931 if (ret >= 0) { 3932 ret = put_user(released_blocks, (u64 __user *)arg); 3933 } else if (released_blocks && 3934 atomic_read(&fi->i_compr_blocks)) { 3935 set_sbi_flag(sbi, SBI_NEED_FSCK); 3936 f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " 3937 "iblocks=%llu, released=%u, compr_blocks=%u, " 3938 "run fsck to fix.", 3939 __func__, inode->i_ino, inode->i_blocks, 3940 released_blocks, 3941 atomic_read(&fi->i_compr_blocks)); 3942 } 3943 3944 return ret; 3945 } 3946 3947 static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, 3948 unsigned int *reserved_blocks) 3949 { 3950 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 3951 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 3952 block_t blkaddr; 3953 int i; 3954 3955 for (i = 0; i < count; i++) { 3956 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3957 dn->ofs_in_node + i); 3958 3959 if (!__is_valid_data_blkaddr(blkaddr)) 3960 continue; 3961 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, 3962 DATA_GENERIC_ENHANCE))) 3963 return -EFSCORRUPTED; 3964 } 3965 3966 while (count) { 3967 int compr_blocks = 0; 3968 blkcnt_t reserved = 0; 3969 blkcnt_t to_reserved; 3970 int ret; 3971 3972 for (i = 0; i < cluster_size; i++) { 3973 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3974 dn->ofs_in_node + i); 3975 3976 if (i == 0) { 3977 if (blkaddr != COMPRESS_ADDR) { 3978 dn->ofs_in_node += cluster_size; 3979 goto next; 3980 } 3981 continue; 3982 } 3983 3984 /* 3985 * compressed cluster was not released due to it 3986 * fails in release_compress_blocks(), so NEW_ADDR 3987 * is a possible case. 3988 */ 3989 if (blkaddr == NEW_ADDR) { 3990 reserved++; 3991 continue; 3992 } 3993 if (__is_valid_data_blkaddr(blkaddr)) { 3994 compr_blocks++; 3995 continue; 3996 } 3997 } 3998 3999 to_reserved = cluster_size - compr_blocks - reserved; 4000 4001 /* for the case all blocks in cluster were reserved */ 4002 if (reserved && to_reserved == 1) { 4003 dn->ofs_in_node += cluster_size; 4004 goto next; 4005 } 4006 4007 ret = inc_valid_block_count(sbi, dn->inode, 4008 &to_reserved, false); 4009 if (unlikely(ret)) 4010 return ret; 4011 4012 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { 4013 if (f2fs_data_blkaddr(dn) == NULL_ADDR) 4014 f2fs_set_data_blkaddr(dn, NEW_ADDR); 4015 } 4016 4017 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); 4018 4019 *reserved_blocks += to_reserved; 4020 next: 4021 count -= cluster_size; 4022 } 4023 4024 return 0; 4025 } 4026 4027 static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) 4028 { 4029 struct inode *inode = file_inode(filp); 4030 struct f2fs_inode_info *fi = F2FS_I(inode); 4031 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4032 pgoff_t page_idx = 0, last_idx; 4033 unsigned int reserved_blocks = 0; 4034 int ret; 4035 4036 if (!f2fs_sb_has_compression(sbi)) 4037 return -EOPNOTSUPP; 4038 4039 if (f2fs_readonly(sbi->sb)) 4040 return -EROFS; 4041 4042 ret = mnt_want_write_file(filp); 4043 if (ret) 4044 return ret; 4045 4046 f2fs_balance_fs(sbi, true); 4047 4048 inode_lock(inode); 4049 4050 if (!f2fs_compressed_file(inode) || 4051 !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4052 ret = -EINVAL; 4053 goto unlock_inode; 4054 } 4055 4056 if (atomic_read(&fi->i_compr_blocks)) 4057 goto unlock_inode; 4058 4059 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 4060 filemap_invalidate_lock(inode->i_mapping); 4061 4062 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4063 4064 while (page_idx < last_idx) { 4065 struct dnode_of_data dn; 4066 pgoff_t end_offset, count; 4067 4068 f2fs_lock_op(sbi); 4069 4070 set_new_dnode(&dn, inode, NULL, NULL, 0); 4071 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); 4072 if (ret) { 4073 f2fs_unlock_op(sbi); 4074 if (ret == -ENOENT) { 4075 page_idx = f2fs_get_next_page_offset(&dn, 4076 page_idx); 4077 ret = 0; 4078 continue; 4079 } 4080 break; 4081 } 4082 4083 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 4084 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); 4085 count = round_up(count, fi->i_cluster_size); 4086 4087 ret = reserve_compress_blocks(&dn, count, &reserved_blocks); 4088 4089 f2fs_put_dnode(&dn); 4090 4091 f2fs_unlock_op(sbi); 4092 4093 if (ret < 0) 4094 break; 4095 4096 page_idx += count; 4097 } 4098 4099 filemap_invalidate_unlock(inode->i_mapping); 4100 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 4101 4102 if (!ret) { 4103 clear_inode_flag(inode, FI_COMPRESS_RELEASED); 4104 inode_set_ctime_current(inode); 4105 f2fs_mark_inode_dirty_sync(inode, true); 4106 } 4107 unlock_inode: 4108 if (reserved_blocks) 4109 f2fs_update_time(sbi, REQ_TIME); 4110 inode_unlock(inode); 4111 mnt_drop_write_file(filp); 4112 4113 if (!ret) { 4114 ret = put_user(reserved_blocks, (u64 __user *)arg); 4115 } else if (reserved_blocks && 4116 atomic_read(&fi->i_compr_blocks)) { 4117 set_sbi_flag(sbi, SBI_NEED_FSCK); 4118 f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx " 4119 "iblocks=%llu, reserved=%u, compr_blocks=%u, " 4120 "run fsck to fix.", 4121 __func__, inode->i_ino, inode->i_blocks, 4122 reserved_blocks, 4123 atomic_read(&fi->i_compr_blocks)); 4124 } 4125 4126 return ret; 4127 } 4128 4129 static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode, 4130 pgoff_t off, block_t block, block_t len, u32 flags) 4131 { 4132 sector_t sector = SECTOR_FROM_BLOCK(block); 4133 sector_t nr_sects = SECTOR_FROM_BLOCK(len); 4134 int ret = 0; 4135 4136 if (flags & F2FS_TRIM_FILE_DISCARD) { 4137 if (bdev_max_secure_erase_sectors(bdev)) 4138 ret = blkdev_issue_secure_erase(bdev, sector, nr_sects, 4139 GFP_NOFS); 4140 else 4141 ret = blkdev_issue_discard(bdev, sector, nr_sects, 4142 GFP_NOFS); 4143 } 4144 4145 if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) { 4146 if (IS_ENCRYPTED(inode)) 4147 ret = fscrypt_zeroout_range(inode, off, block, len); 4148 else 4149 ret = blkdev_issue_zeroout(bdev, sector, nr_sects, 4150 GFP_NOFS, 0); 4151 } 4152 4153 return ret; 4154 } 4155 4156 static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) 4157 { 4158 struct inode *inode = file_inode(filp); 4159 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4160 struct address_space *mapping = inode->i_mapping; 4161 struct block_device *prev_bdev = NULL; 4162 struct f2fs_sectrim_range range; 4163 pgoff_t index, pg_end, prev_index = 0; 4164 block_t prev_block = 0, len = 0; 4165 loff_t end_addr; 4166 bool to_end = false; 4167 int ret = 0; 4168 4169 if (!(filp->f_mode & FMODE_WRITE)) 4170 return -EBADF; 4171 4172 if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg, 4173 sizeof(range))) 4174 return -EFAULT; 4175 4176 if (range.flags == 0 || (range.flags & ~F2FS_TRIM_FILE_MASK) || 4177 !S_ISREG(inode->i_mode)) 4178 return -EINVAL; 4179 4180 if (((range.flags & F2FS_TRIM_FILE_DISCARD) && 4181 !f2fs_hw_support_discard(sbi)) || 4182 ((range.flags & F2FS_TRIM_FILE_ZEROOUT) && 4183 IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi))) 4184 return -EOPNOTSUPP; 4185 4186 ret = mnt_want_write_file(filp); 4187 if (ret) 4188 return ret; 4189 inode_lock(inode); 4190 4191 if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) || 4192 range.start >= inode->i_size) { 4193 ret = -EINVAL; 4194 goto err; 4195 } 4196 4197 if (range.len == 0) 4198 goto err; 4199 4200 if (inode->i_size - range.start > range.len) { 4201 end_addr = range.start + range.len; 4202 } else { 4203 end_addr = range.len == (u64)-1 ? 4204 sbi->sb->s_maxbytes : inode->i_size; 4205 to_end = true; 4206 } 4207 4208 if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) || 4209 (!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) { 4210 ret = -EINVAL; 4211 goto err; 4212 } 4213 4214 index = F2FS_BYTES_TO_BLK(range.start); 4215 pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE); 4216 4217 ret = f2fs_convert_inline_inode(inode); 4218 if (ret) 4219 goto err; 4220 4221 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 4222 filemap_invalidate_lock(mapping); 4223 4224 ret = filemap_write_and_wait_range(mapping, range.start, 4225 to_end ? LLONG_MAX : end_addr - 1); 4226 if (ret) 4227 goto out; 4228 4229 truncate_inode_pages_range(mapping, range.start, 4230 to_end ? -1 : end_addr - 1); 4231 4232 while (index < pg_end) { 4233 struct dnode_of_data dn; 4234 pgoff_t end_offset, count; 4235 int i; 4236 4237 set_new_dnode(&dn, inode, NULL, NULL, 0); 4238 ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); 4239 if (ret) { 4240 if (ret == -ENOENT) { 4241 index = f2fs_get_next_page_offset(&dn, index); 4242 continue; 4243 } 4244 goto out; 4245 } 4246 4247 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 4248 count = min(end_offset - dn.ofs_in_node, pg_end - index); 4249 for (i = 0; i < count; i++, index++, dn.ofs_in_node++) { 4250 struct block_device *cur_bdev; 4251 block_t blkaddr = f2fs_data_blkaddr(&dn); 4252 4253 if (!__is_valid_data_blkaddr(blkaddr)) 4254 continue; 4255 4256 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, 4257 DATA_GENERIC_ENHANCE)) { 4258 ret = -EFSCORRUPTED; 4259 f2fs_put_dnode(&dn); 4260 goto out; 4261 } 4262 4263 cur_bdev = f2fs_target_device(sbi, blkaddr, NULL); 4264 if (f2fs_is_multi_device(sbi)) { 4265 int di = f2fs_target_device_index(sbi, blkaddr); 4266 4267 blkaddr -= FDEV(di).start_blk; 4268 } 4269 4270 if (len) { 4271 if (prev_bdev == cur_bdev && 4272 index == prev_index + len && 4273 blkaddr == prev_block + len) { 4274 len++; 4275 } else { 4276 ret = f2fs_secure_erase(prev_bdev, 4277 inode, prev_index, prev_block, 4278 len, range.flags); 4279 if (ret) { 4280 f2fs_put_dnode(&dn); 4281 goto out; 4282 } 4283 4284 len = 0; 4285 } 4286 } 4287 4288 if (!len) { 4289 prev_bdev = cur_bdev; 4290 prev_index = index; 4291 prev_block = blkaddr; 4292 len = 1; 4293 } 4294 } 4295 4296 f2fs_put_dnode(&dn); 4297 4298 if (fatal_signal_pending(current)) { 4299 ret = -EINTR; 4300 goto out; 4301 } 4302 cond_resched(); 4303 } 4304 4305 if (len) 4306 ret = f2fs_secure_erase(prev_bdev, inode, prev_index, 4307 prev_block, len, range.flags); 4308 f2fs_update_time(sbi, REQ_TIME); 4309 out: 4310 filemap_invalidate_unlock(mapping); 4311 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 4312 err: 4313 inode_unlock(inode); 4314 mnt_drop_write_file(filp); 4315 4316 return ret; 4317 } 4318 4319 static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg) 4320 { 4321 struct inode *inode = file_inode(filp); 4322 struct f2fs_comp_option option; 4323 4324 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 4325 return -EOPNOTSUPP; 4326 4327 inode_lock_shared(inode); 4328 4329 if (!f2fs_compressed_file(inode)) { 4330 inode_unlock_shared(inode); 4331 return -ENODATA; 4332 } 4333 4334 option.algorithm = F2FS_I(inode)->i_compress_algorithm; 4335 option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size; 4336 4337 inode_unlock_shared(inode); 4338 4339 if (copy_to_user((struct f2fs_comp_option __user *)arg, &option, 4340 sizeof(option))) 4341 return -EFAULT; 4342 4343 return 0; 4344 } 4345 4346 static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) 4347 { 4348 struct inode *inode = file_inode(filp); 4349 struct f2fs_inode_info *fi = F2FS_I(inode); 4350 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4351 struct f2fs_comp_option option; 4352 int ret = 0; 4353 4354 if (!f2fs_sb_has_compression(sbi)) 4355 return -EOPNOTSUPP; 4356 4357 if (!(filp->f_mode & FMODE_WRITE)) 4358 return -EBADF; 4359 4360 if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg, 4361 sizeof(option))) 4362 return -EFAULT; 4363 4364 if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || 4365 option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || 4366 option.algorithm >= COMPRESS_MAX) 4367 return -EINVAL; 4368 4369 ret = mnt_want_write_file(filp); 4370 if (ret) 4371 return ret; 4372 inode_lock(inode); 4373 4374 f2fs_down_write(&F2FS_I(inode)->i_sem); 4375 if (!f2fs_compressed_file(inode)) { 4376 ret = -EINVAL; 4377 goto out; 4378 } 4379 4380 if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) { 4381 ret = -EBUSY; 4382 goto out; 4383 } 4384 4385 if (F2FS_HAS_BLOCKS(inode)) { 4386 ret = -EFBIG; 4387 goto out; 4388 } 4389 4390 fi->i_compress_algorithm = option.algorithm; 4391 fi->i_log_cluster_size = option.log_cluster_size; 4392 fi->i_cluster_size = BIT(option.log_cluster_size); 4393 /* Set default level */ 4394 if (fi->i_compress_algorithm == COMPRESS_ZSTD) 4395 fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; 4396 else 4397 fi->i_compress_level = 0; 4398 /* Adjust mount option level */ 4399 if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm && 4400 F2FS_OPTION(sbi).compress_level) 4401 fi->i_compress_level = F2FS_OPTION(sbi).compress_level; 4402 f2fs_mark_inode_dirty_sync(inode, true); 4403 4404 if (!f2fs_is_compress_backend_ready(inode)) 4405 f2fs_warn(sbi, "compression algorithm is successfully set, " 4406 "but current kernel doesn't support this algorithm."); 4407 out: 4408 f2fs_up_write(&fi->i_sem); 4409 inode_unlock(inode); 4410 mnt_drop_write_file(filp); 4411 4412 return ret; 4413 } 4414 4415 static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) 4416 { 4417 DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx); 4418 struct address_space *mapping = inode->i_mapping; 4419 struct folio *folio; 4420 pgoff_t redirty_idx = page_idx; 4421 int page_len = 0, ret = 0; 4422 4423 filemap_invalidate_lock_shared(mapping); 4424 page_cache_ra_unbounded(&ractl, len, 0); 4425 filemap_invalidate_unlock_shared(mapping); 4426 4427 do { 4428 folio = read_cache_folio(mapping, page_idx, NULL, NULL); 4429 if (IS_ERR(folio)) { 4430 ret = PTR_ERR(folio); 4431 break; 4432 } 4433 page_len += folio_nr_pages(folio) - (page_idx - folio->index); 4434 page_idx = folio_next_index(folio); 4435 } while (page_len < len); 4436 4437 do { 4438 folio = filemap_lock_folio(mapping, redirty_idx); 4439 4440 /* It will never fail, when folio has pinned above */ 4441 f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(folio)); 4442 4443 f2fs_folio_wait_writeback(folio, DATA, true, true); 4444 4445 folio_mark_dirty(folio); 4446 folio_set_f2fs_gcing(folio); 4447 redirty_idx = folio_next_index(folio); 4448 folio_unlock(folio); 4449 folio_put_refs(folio, 2); 4450 } while (redirty_idx < page_idx); 4451 4452 return ret; 4453 } 4454 4455 static int f2fs_ioc_decompress_file(struct file *filp) 4456 { 4457 struct inode *inode = file_inode(filp); 4458 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4459 struct f2fs_inode_info *fi = F2FS_I(inode); 4460 pgoff_t page_idx = 0, last_idx, cluster_idx; 4461 int ret; 4462 4463 if (!f2fs_sb_has_compression(sbi) || 4464 F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) 4465 return -EOPNOTSUPP; 4466 4467 if (!(filp->f_mode & FMODE_WRITE)) 4468 return -EBADF; 4469 4470 f2fs_balance_fs(sbi, true); 4471 4472 ret = mnt_want_write_file(filp); 4473 if (ret) 4474 return ret; 4475 inode_lock(inode); 4476 4477 if (!f2fs_is_compress_backend_ready(inode)) { 4478 ret = -EOPNOTSUPP; 4479 goto out; 4480 } 4481 4482 if (!f2fs_compressed_file(inode) || 4483 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4484 ret = -EINVAL; 4485 goto out; 4486 } 4487 4488 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 4489 if (ret) 4490 goto out; 4491 4492 if (!atomic_read(&fi->i_compr_blocks)) 4493 goto out; 4494 4495 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4496 last_idx >>= fi->i_log_cluster_size; 4497 4498 for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { 4499 page_idx = cluster_idx << fi->i_log_cluster_size; 4500 4501 if (!f2fs_is_compressed_cluster(inode, page_idx)) 4502 continue; 4503 4504 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); 4505 if (ret < 0) 4506 break; 4507 4508 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { 4509 ret = filemap_fdatawrite(inode->i_mapping); 4510 if (ret < 0) 4511 break; 4512 } 4513 4514 cond_resched(); 4515 if (fatal_signal_pending(current)) { 4516 ret = -EINTR; 4517 break; 4518 } 4519 } 4520 4521 if (!ret) 4522 ret = filemap_write_and_wait_range(inode->i_mapping, 0, 4523 LLONG_MAX); 4524 4525 if (ret) 4526 f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.", 4527 __func__, ret); 4528 f2fs_update_time(sbi, REQ_TIME); 4529 out: 4530 inode_unlock(inode); 4531 mnt_drop_write_file(filp); 4532 4533 return ret; 4534 } 4535 4536 static int f2fs_ioc_compress_file(struct file *filp) 4537 { 4538 struct inode *inode = file_inode(filp); 4539 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4540 struct f2fs_inode_info *fi = F2FS_I(inode); 4541 pgoff_t page_idx = 0, last_idx, cluster_idx; 4542 int ret; 4543 4544 if (!f2fs_sb_has_compression(sbi) || 4545 F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) 4546 return -EOPNOTSUPP; 4547 4548 if (!(filp->f_mode & FMODE_WRITE)) 4549 return -EBADF; 4550 4551 f2fs_balance_fs(sbi, true); 4552 4553 ret = mnt_want_write_file(filp); 4554 if (ret) 4555 return ret; 4556 inode_lock(inode); 4557 4558 if (!f2fs_is_compress_backend_ready(inode)) { 4559 ret = -EOPNOTSUPP; 4560 goto out; 4561 } 4562 4563 if (!f2fs_compressed_file(inode) || 4564 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4565 ret = -EINVAL; 4566 goto out; 4567 } 4568 4569 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 4570 if (ret) 4571 goto out; 4572 4573 set_inode_flag(inode, FI_ENABLE_COMPRESS); 4574 4575 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4576 last_idx >>= fi->i_log_cluster_size; 4577 4578 for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { 4579 page_idx = cluster_idx << fi->i_log_cluster_size; 4580 4581 if (f2fs_is_sparse_cluster(inode, page_idx)) 4582 continue; 4583 4584 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); 4585 if (ret < 0) 4586 break; 4587 4588 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { 4589 ret = filemap_fdatawrite(inode->i_mapping); 4590 if (ret < 0) 4591 break; 4592 } 4593 4594 cond_resched(); 4595 if (fatal_signal_pending(current)) { 4596 ret = -EINTR; 4597 break; 4598 } 4599 } 4600 4601 if (!ret) 4602 ret = filemap_write_and_wait_range(inode->i_mapping, 0, 4603 LLONG_MAX); 4604 4605 clear_inode_flag(inode, FI_ENABLE_COMPRESS); 4606 4607 if (ret) 4608 f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.", 4609 __func__, ret); 4610 f2fs_update_time(sbi, REQ_TIME); 4611 out: 4612 inode_unlock(inode); 4613 mnt_drop_write_file(filp); 4614 4615 return ret; 4616 } 4617 4618 static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 4619 { 4620 switch (cmd) { 4621 case FS_IOC_GETVERSION: 4622 return f2fs_ioc_getversion(filp, arg); 4623 case F2FS_IOC_START_ATOMIC_WRITE: 4624 return f2fs_ioc_start_atomic_write(filp, false); 4625 case F2FS_IOC_START_ATOMIC_REPLACE: 4626 return f2fs_ioc_start_atomic_write(filp, true); 4627 case F2FS_IOC_COMMIT_ATOMIC_WRITE: 4628 return f2fs_ioc_commit_atomic_write(filp); 4629 case F2FS_IOC_ABORT_ATOMIC_WRITE: 4630 return f2fs_ioc_abort_atomic_write(filp); 4631 case F2FS_IOC_START_VOLATILE_WRITE: 4632 case F2FS_IOC_RELEASE_VOLATILE_WRITE: 4633 return -EOPNOTSUPP; 4634 case F2FS_IOC_SHUTDOWN: 4635 return f2fs_ioc_shutdown(filp, arg); 4636 case FITRIM: 4637 return f2fs_ioc_fitrim(filp, arg); 4638 case FS_IOC_SET_ENCRYPTION_POLICY: 4639 return f2fs_ioc_set_encryption_policy(filp, arg); 4640 case FS_IOC_GET_ENCRYPTION_POLICY: 4641 return f2fs_ioc_get_encryption_policy(filp, arg); 4642 case FS_IOC_GET_ENCRYPTION_PWSALT: 4643 return f2fs_ioc_get_encryption_pwsalt(filp, arg); 4644 case FS_IOC_GET_ENCRYPTION_POLICY_EX: 4645 return f2fs_ioc_get_encryption_policy_ex(filp, arg); 4646 case FS_IOC_ADD_ENCRYPTION_KEY: 4647 return f2fs_ioc_add_encryption_key(filp, arg); 4648 case FS_IOC_REMOVE_ENCRYPTION_KEY: 4649 return f2fs_ioc_remove_encryption_key(filp, arg); 4650 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: 4651 return f2fs_ioc_remove_encryption_key_all_users(filp, arg); 4652 case FS_IOC_GET_ENCRYPTION_KEY_STATUS: 4653 return f2fs_ioc_get_encryption_key_status(filp, arg); 4654 case FS_IOC_GET_ENCRYPTION_NONCE: 4655 return f2fs_ioc_get_encryption_nonce(filp, arg); 4656 case F2FS_IOC_GARBAGE_COLLECT: 4657 return f2fs_ioc_gc(filp, arg); 4658 case F2FS_IOC_GARBAGE_COLLECT_RANGE: 4659 return f2fs_ioc_gc_range(filp, arg); 4660 case F2FS_IOC_WRITE_CHECKPOINT: 4661 return f2fs_ioc_write_checkpoint(filp); 4662 case F2FS_IOC_DEFRAGMENT: 4663 return f2fs_ioc_defragment(filp, arg); 4664 case F2FS_IOC_MOVE_RANGE: 4665 return f2fs_ioc_move_range(filp, arg); 4666 case F2FS_IOC_FLUSH_DEVICE: 4667 return f2fs_ioc_flush_device(filp, arg); 4668 case F2FS_IOC_GET_FEATURES: 4669 return f2fs_ioc_get_features(filp, arg); 4670 case F2FS_IOC_GET_PIN_FILE: 4671 return f2fs_ioc_get_pin_file(filp, arg); 4672 case F2FS_IOC_SET_PIN_FILE: 4673 return f2fs_ioc_set_pin_file(filp, arg); 4674 case F2FS_IOC_PRECACHE_EXTENTS: 4675 return f2fs_ioc_precache_extents(filp); 4676 case F2FS_IOC_RESIZE_FS: 4677 return f2fs_ioc_resize_fs(filp, arg); 4678 case FS_IOC_ENABLE_VERITY: 4679 return f2fs_ioc_enable_verity(filp, arg); 4680 case FS_IOC_MEASURE_VERITY: 4681 return f2fs_ioc_measure_verity(filp, arg); 4682 case FS_IOC_READ_VERITY_METADATA: 4683 return f2fs_ioc_read_verity_metadata(filp, arg); 4684 case FS_IOC_GETFSLABEL: 4685 return f2fs_ioc_getfslabel(filp, arg); 4686 case FS_IOC_SETFSLABEL: 4687 return f2fs_ioc_setfslabel(filp, arg); 4688 case F2FS_IOC_GET_COMPRESS_BLOCKS: 4689 return f2fs_ioc_get_compress_blocks(filp, arg); 4690 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: 4691 return f2fs_release_compress_blocks(filp, arg); 4692 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: 4693 return f2fs_reserve_compress_blocks(filp, arg); 4694 case F2FS_IOC_SEC_TRIM_FILE: 4695 return f2fs_sec_trim_file(filp, arg); 4696 case F2FS_IOC_GET_COMPRESS_OPTION: 4697 return f2fs_ioc_get_compress_option(filp, arg); 4698 case F2FS_IOC_SET_COMPRESS_OPTION: 4699 return f2fs_ioc_set_compress_option(filp, arg); 4700 case F2FS_IOC_DECOMPRESS_FILE: 4701 return f2fs_ioc_decompress_file(filp); 4702 case F2FS_IOC_COMPRESS_FILE: 4703 return f2fs_ioc_compress_file(filp); 4704 case F2FS_IOC_GET_DEV_ALIAS_FILE: 4705 return f2fs_ioc_get_dev_alias_file(filp, arg); 4706 case F2FS_IOC_IO_PRIO: 4707 return f2fs_ioc_io_prio(filp, arg); 4708 default: 4709 return -ENOTTY; 4710 } 4711 } 4712 4713 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 4714 { 4715 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) 4716 return -EIO; 4717 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp)))) 4718 return -ENOSPC; 4719 4720 return __f2fs_ioctl(filp, cmd, arg); 4721 } 4722 4723 /* 4724 * Return %true if the given read or write request should use direct I/O, or 4725 * %false if it should use buffered I/O. 4726 */ 4727 static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb, 4728 struct iov_iter *iter) 4729 { 4730 unsigned int align; 4731 4732 if (!(iocb->ki_flags & IOCB_DIRECT)) 4733 return false; 4734 4735 if (f2fs_force_buffered_io(inode, iov_iter_rw(iter))) 4736 return false; 4737 4738 /* 4739 * Direct I/O not aligned to the disk's logical_block_size will be 4740 * attempted, but will fail with -EINVAL. 4741 * 4742 * f2fs additionally requires that direct I/O be aligned to the 4743 * filesystem block size, which is often a stricter requirement. 4744 * However, f2fs traditionally falls back to buffered I/O on requests 4745 * that are logical_block_size-aligned but not fs-block aligned. 4746 * 4747 * The below logic implements this behavior. 4748 */ 4749 align = iocb->ki_pos | iov_iter_alignment(iter); 4750 if (!IS_ALIGNED(align, i_blocksize(inode)) && 4751 IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev))) 4752 return false; 4753 4754 return true; 4755 } 4756 4757 static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error, 4758 unsigned int flags) 4759 { 4760 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); 4761 4762 dec_page_count(sbi, F2FS_DIO_READ); 4763 if (error) 4764 return error; 4765 f2fs_update_iostat(sbi, NULL, APP_DIRECT_READ_IO, size); 4766 return 0; 4767 } 4768 4769 static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = { 4770 .end_io = f2fs_dio_read_end_io, 4771 }; 4772 4773 static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) 4774 { 4775 struct file *file = iocb->ki_filp; 4776 struct inode *inode = file_inode(file); 4777 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4778 struct f2fs_inode_info *fi = F2FS_I(inode); 4779 const loff_t pos = iocb->ki_pos; 4780 const size_t count = iov_iter_count(to); 4781 struct iomap_dio *dio; 4782 ssize_t ret; 4783 4784 if (count == 0) 4785 return 0; /* skip atime update */ 4786 4787 trace_f2fs_direct_IO_enter(inode, iocb, count, READ); 4788 4789 if (iocb->ki_flags & IOCB_NOWAIT) { 4790 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { 4791 ret = -EAGAIN; 4792 goto out; 4793 } 4794 } else { 4795 f2fs_down_read(&fi->i_gc_rwsem[READ]); 4796 } 4797 4798 /* dio is not compatible w/ atomic file */ 4799 if (f2fs_is_atomic_file(inode)) { 4800 f2fs_up_read(&fi->i_gc_rwsem[READ]); 4801 ret = -EOPNOTSUPP; 4802 goto out; 4803 } 4804 4805 /* 4806 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of 4807 * the higher-level function iomap_dio_rw() in order to ensure that the 4808 * F2FS_DIO_READ counter will be decremented correctly in all cases. 4809 */ 4810 inc_page_count(sbi, F2FS_DIO_READ); 4811 dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops, 4812 &f2fs_iomap_dio_read_ops, 0, NULL, 0); 4813 if (IS_ERR_OR_NULL(dio)) { 4814 ret = PTR_ERR_OR_ZERO(dio); 4815 if (ret != -EIOCBQUEUED) 4816 dec_page_count(sbi, F2FS_DIO_READ); 4817 } else { 4818 ret = iomap_dio_complete(dio); 4819 } 4820 4821 f2fs_up_read(&fi->i_gc_rwsem[READ]); 4822 4823 file_accessed(file); 4824 out: 4825 trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret); 4826 return ret; 4827 } 4828 4829 static void f2fs_trace_rw_file_path(struct file *file, loff_t pos, size_t count, 4830 int rw) 4831 { 4832 struct inode *inode = file_inode(file); 4833 char *buf, *path; 4834 4835 buf = f2fs_getname(F2FS_I_SB(inode)); 4836 if (!buf) 4837 return; 4838 path = dentry_path_raw(file_dentry(file), buf, PATH_MAX); 4839 if (IS_ERR(path)) 4840 goto free_buf; 4841 if (rw == WRITE) 4842 trace_f2fs_datawrite_start(inode, pos, count, 4843 current->pid, path, current->comm); 4844 else 4845 trace_f2fs_dataread_start(inode, pos, count, 4846 current->pid, path, current->comm); 4847 free_buf: 4848 f2fs_putname(buf); 4849 } 4850 4851 static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 4852 { 4853 struct inode *inode = file_inode(iocb->ki_filp); 4854 const loff_t pos = iocb->ki_pos; 4855 ssize_t ret; 4856 bool dio; 4857 4858 if (!f2fs_is_compress_backend_ready(inode)) 4859 return -EOPNOTSUPP; 4860 4861 if (trace_f2fs_dataread_start_enabled()) 4862 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, 4863 iov_iter_count(to), READ); 4864 4865 dio = f2fs_should_use_dio(inode, iocb, to); 4866 4867 /* In LFS mode, if there is inflight dio, wait for its completion */ 4868 if (f2fs_lfs_mode(F2FS_I_SB(inode)) && 4869 get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) && 4870 (!f2fs_is_pinned_file(inode) || !dio)) 4871 inode_dio_wait(inode); 4872 4873 if (dio) { 4874 ret = f2fs_dio_read_iter(iocb, to); 4875 } else { 4876 ret = filemap_read(iocb, to, 0); 4877 if (ret > 0) 4878 f2fs_update_iostat(F2FS_I_SB(inode), inode, 4879 APP_BUFFERED_READ_IO, ret); 4880 } 4881 trace_f2fs_dataread_end(inode, pos, ret); 4882 return ret; 4883 } 4884 4885 static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos, 4886 struct pipe_inode_info *pipe, 4887 size_t len, unsigned int flags) 4888 { 4889 struct inode *inode = file_inode(in); 4890 const loff_t pos = *ppos; 4891 ssize_t ret; 4892 4893 if (!f2fs_is_compress_backend_ready(inode)) 4894 return -EOPNOTSUPP; 4895 4896 if (trace_f2fs_dataread_start_enabled()) 4897 f2fs_trace_rw_file_path(in, pos, len, READ); 4898 4899 ret = filemap_splice_read(in, ppos, pipe, len, flags); 4900 if (ret > 0) 4901 f2fs_update_iostat(F2FS_I_SB(inode), inode, 4902 APP_BUFFERED_READ_IO, ret); 4903 4904 trace_f2fs_dataread_end(inode, pos, ret); 4905 return ret; 4906 } 4907 4908 static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from) 4909 { 4910 struct file *file = iocb->ki_filp; 4911 struct inode *inode = file_inode(file); 4912 ssize_t count; 4913 int err; 4914 4915 if (IS_IMMUTABLE(inode)) 4916 return -EPERM; 4917 4918 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) 4919 return -EPERM; 4920 4921 count = generic_write_checks(iocb, from); 4922 if (count <= 0) 4923 return count; 4924 4925 err = file_modified(file); 4926 if (err) 4927 return err; 4928 4929 f2fs_zero_post_eof_page(inode, 4930 iocb->ki_pos + iov_iter_count(from), true); 4931 return count; 4932 } 4933 4934 /* 4935 * Preallocate blocks for a write request, if it is possible and helpful to do 4936 * so. Returns a positive number if blocks may have been preallocated, 0 if no 4937 * blocks were preallocated, or a negative errno value if something went 4938 * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the 4939 * requested blocks (not just some of them) have been allocated. 4940 */ 4941 static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, 4942 bool dio) 4943 { 4944 struct inode *inode = file_inode(iocb->ki_filp); 4945 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4946 const loff_t pos = iocb->ki_pos; 4947 const size_t count = iov_iter_count(iter); 4948 struct f2fs_map_blocks map = {}; 4949 int flag; 4950 int ret; 4951 4952 /* If it will be an out-of-place direct write, don't bother. */ 4953 if (dio && f2fs_lfs_mode(sbi)) 4954 return 0; 4955 /* 4956 * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into 4957 * buffered IO, if DIO meets any holes. 4958 */ 4959 if (dio && i_size_read(inode) && 4960 (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode)))) 4961 return 0; 4962 4963 /* No-wait I/O can't allocate blocks. */ 4964 if (iocb->ki_flags & IOCB_NOWAIT) 4965 return 0; 4966 4967 /* If it will be a short write, don't bother. */ 4968 if (fault_in_iov_iter_readable(iter, count)) 4969 return 0; 4970 4971 if (f2fs_has_inline_data(inode)) { 4972 /* If the data will fit inline, don't bother. */ 4973 if (pos + count <= MAX_INLINE_DATA(inode)) 4974 return 0; 4975 ret = f2fs_convert_inline_inode(inode); 4976 if (ret) 4977 return ret; 4978 } 4979 4980 /* Do not preallocate blocks that will be written partially in 4KB. */ 4981 map.m_lblk = F2FS_BLK_ALIGN(pos); 4982 map.m_len = F2FS_BYTES_TO_BLK(pos + count); 4983 if (map.m_len > map.m_lblk) 4984 map.m_len -= map.m_lblk; 4985 else 4986 return 0; 4987 4988 if (!IS_DEVICE_ALIASING(inode)) 4989 map.m_may_create = true; 4990 if (dio) { 4991 map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, 4992 inode->i_write_hint); 4993 flag = F2FS_GET_BLOCK_PRE_DIO; 4994 } else { 4995 map.m_seg_type = NO_CHECK_TYPE; 4996 flag = F2FS_GET_BLOCK_PRE_AIO; 4997 } 4998 4999 ret = f2fs_map_blocks(inode, &map, flag); 5000 /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */ 5001 if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0)) 5002 return ret; 5003 if (ret == 0) 5004 set_inode_flag(inode, FI_PREALLOCATED_ALL); 5005 return map.m_len; 5006 } 5007 5008 static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb, 5009 struct iov_iter *from) 5010 { 5011 struct file *file = iocb->ki_filp; 5012 struct inode *inode = file_inode(file); 5013 ssize_t ret; 5014 5015 if (iocb->ki_flags & IOCB_NOWAIT) 5016 return -EOPNOTSUPP; 5017 5018 ret = generic_perform_write(iocb, from); 5019 5020 if (ret > 0) { 5021 f2fs_update_iostat(F2FS_I_SB(inode), inode, 5022 APP_BUFFERED_IO, ret); 5023 } 5024 return ret; 5025 } 5026 5027 static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error, 5028 unsigned int flags) 5029 { 5030 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); 5031 5032 dec_page_count(sbi, F2FS_DIO_WRITE); 5033 if (error) 5034 return error; 5035 f2fs_update_time(sbi, REQ_TIME); 5036 f2fs_update_iostat(sbi, NULL, APP_DIRECT_IO, size); 5037 return 0; 5038 } 5039 5040 static void f2fs_dio_write_submit_io(const struct iomap_iter *iter, 5041 struct bio *bio, loff_t file_offset) 5042 { 5043 struct inode *inode = iter->inode; 5044 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 5045 enum log_type type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint); 5046 enum temp_type temp = f2fs_get_segment_temp(sbi, type); 5047 5048 bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp); 5049 blk_crypto_submit_bio(bio); 5050 } 5051 5052 static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = { 5053 .end_io = f2fs_dio_write_end_io, 5054 .submit_io = f2fs_dio_write_submit_io, 5055 }; 5056 5057 static void f2fs_flush_buffered_write(struct address_space *mapping, 5058 loff_t start_pos, loff_t end_pos) 5059 { 5060 int ret; 5061 5062 ret = filemap_write_and_wait_range(mapping, start_pos, end_pos); 5063 if (ret < 0) 5064 return; 5065 invalidate_mapping_pages(mapping, 5066 start_pos >> PAGE_SHIFT, 5067 end_pos >> PAGE_SHIFT); 5068 } 5069 5070 static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, 5071 bool *may_need_sync) 5072 { 5073 struct file *file = iocb->ki_filp; 5074 struct inode *inode = file_inode(file); 5075 struct f2fs_inode_info *fi = F2FS_I(inode); 5076 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 5077 const bool do_opu = f2fs_lfs_mode(sbi); 5078 const loff_t pos = iocb->ki_pos; 5079 const ssize_t count = iov_iter_count(from); 5080 unsigned int dio_flags; 5081 struct iomap_dio *dio; 5082 ssize_t ret; 5083 5084 trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE); 5085 5086 if (iocb->ki_flags & IOCB_NOWAIT) { 5087 /* f2fs_convert_inline_inode() and block allocation can block */ 5088 if (f2fs_has_inline_data(inode) || 5089 !f2fs_overwrite_io(inode, pos, count)) { 5090 ret = -EAGAIN; 5091 goto out; 5092 } 5093 5094 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) { 5095 ret = -EAGAIN; 5096 goto out; 5097 } 5098 if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { 5099 f2fs_up_read(&fi->i_gc_rwsem[WRITE]); 5100 ret = -EAGAIN; 5101 goto out; 5102 } 5103 } else { 5104 ret = f2fs_convert_inline_inode(inode); 5105 if (ret) 5106 goto out; 5107 5108 f2fs_down_read(&fi->i_gc_rwsem[WRITE]); 5109 if (do_opu) 5110 f2fs_down_read(&fi->i_gc_rwsem[READ]); 5111 } 5112 5113 /* 5114 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of 5115 * the higher-level function iomap_dio_rw() in order to ensure that the 5116 * F2FS_DIO_WRITE counter will be decremented correctly in all cases. 5117 */ 5118 inc_page_count(sbi, F2FS_DIO_WRITE); 5119 dio_flags = 0; 5120 if (pos + count > inode->i_size) 5121 dio_flags |= IOMAP_DIO_FORCE_WAIT; 5122 dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops, 5123 &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0); 5124 if (IS_ERR_OR_NULL(dio)) { 5125 ret = PTR_ERR_OR_ZERO(dio); 5126 if (ret == -ENOTBLK) 5127 ret = 0; 5128 if (ret != -EIOCBQUEUED) 5129 dec_page_count(sbi, F2FS_DIO_WRITE); 5130 } else { 5131 ret = iomap_dio_complete(dio); 5132 } 5133 5134 if (do_opu) 5135 f2fs_up_read(&fi->i_gc_rwsem[READ]); 5136 f2fs_up_read(&fi->i_gc_rwsem[WRITE]); 5137 5138 if (ret < 0) 5139 goto out; 5140 if (pos + ret > inode->i_size) 5141 f2fs_i_size_write(inode, pos + ret); 5142 if (!do_opu) 5143 set_inode_flag(inode, FI_UPDATE_WRITE); 5144 5145 if (iov_iter_count(from)) { 5146 ssize_t ret2; 5147 loff_t bufio_start_pos = iocb->ki_pos; 5148 5149 /* 5150 * The direct write was partial, so we need to fall back to a 5151 * buffered write for the remainder. 5152 */ 5153 5154 ret2 = f2fs_buffered_write_iter(iocb, from); 5155 if (iov_iter_count(from)) 5156 f2fs_write_failed(inode, iocb->ki_pos); 5157 if (ret2 < 0) 5158 goto out; 5159 5160 /* 5161 * Ensure that the pagecache pages are written to disk and 5162 * invalidated to preserve the expected O_DIRECT semantics. 5163 */ 5164 if (ret2 > 0) { 5165 loff_t bufio_end_pos = bufio_start_pos + ret2 - 1; 5166 5167 ret += ret2; 5168 5169 f2fs_flush_buffered_write(file->f_mapping, 5170 bufio_start_pos, 5171 bufio_end_pos); 5172 } 5173 } else { 5174 /* iomap_dio_rw() already handled the generic_write_sync(). */ 5175 *may_need_sync = false; 5176 } 5177 out: 5178 trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret); 5179 return ret; 5180 } 5181 5182 static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 5183 { 5184 struct inode *inode = file_inode(iocb->ki_filp); 5185 const loff_t orig_pos = iocb->ki_pos; 5186 const size_t orig_count = iov_iter_count(from); 5187 loff_t target_size; 5188 bool dio; 5189 bool may_need_sync = true; 5190 int preallocated; 5191 const loff_t pos = iocb->ki_pos; 5192 const ssize_t count = iov_iter_count(from); 5193 ssize_t ret; 5194 5195 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { 5196 ret = -EIO; 5197 goto out; 5198 } 5199 5200 if (!f2fs_is_compress_backend_ready(inode)) { 5201 ret = -EOPNOTSUPP; 5202 goto out; 5203 } 5204 5205 if (iocb->ki_flags & IOCB_NOWAIT) { 5206 if (!inode_trylock(inode)) { 5207 ret = -EAGAIN; 5208 goto out; 5209 } 5210 } else { 5211 inode_lock(inode); 5212 } 5213 5214 if (f2fs_is_pinned_file(inode) && 5215 !f2fs_overwrite_io(inode, pos, count)) { 5216 ret = -EIO; 5217 goto out_unlock; 5218 } 5219 5220 ret = f2fs_write_checks(iocb, from); 5221 if (ret <= 0) 5222 goto out_unlock; 5223 5224 /* Determine whether we will do a direct write or a buffered write. */ 5225 dio = f2fs_should_use_dio(inode, iocb, from); 5226 5227 /* dio is not compatible w/ atomic write */ 5228 if (dio && f2fs_is_atomic_file(inode)) { 5229 ret = -EOPNOTSUPP; 5230 goto out_unlock; 5231 } 5232 5233 /* Possibly preallocate the blocks for the write. */ 5234 target_size = iocb->ki_pos + iov_iter_count(from); 5235 preallocated = f2fs_preallocate_blocks(iocb, from, dio); 5236 if (preallocated < 0) { 5237 ret = preallocated; 5238 } else { 5239 if (trace_f2fs_datawrite_start_enabled()) 5240 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, 5241 orig_count, WRITE); 5242 5243 /* Do the actual write. */ 5244 ret = dio ? 5245 f2fs_dio_write_iter(iocb, from, &may_need_sync) : 5246 f2fs_buffered_write_iter(iocb, from); 5247 5248 trace_f2fs_datawrite_end(inode, orig_pos, ret); 5249 } 5250 5251 /* Don't leave any preallocated blocks around past i_size. */ 5252 if (preallocated && i_size_read(inode) < target_size) { 5253 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 5254 filemap_invalidate_lock(inode->i_mapping); 5255 if (!f2fs_truncate(inode)) 5256 file_dont_truncate(inode); 5257 filemap_invalidate_unlock(inode->i_mapping); 5258 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 5259 } else { 5260 file_dont_truncate(inode); 5261 } 5262 5263 clear_inode_flag(inode, FI_PREALLOCATED_ALL); 5264 out_unlock: 5265 inode_unlock(inode); 5266 out: 5267 trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret); 5268 5269 if (ret > 0 && may_need_sync) 5270 ret = generic_write_sync(iocb, ret); 5271 5272 /* If buffered IO was forced, flush and drop the data from 5273 * the page cache to preserve O_DIRECT semantics 5274 */ 5275 if (ret > 0 && !dio && (iocb->ki_flags & IOCB_DIRECT)) 5276 f2fs_flush_buffered_write(iocb->ki_filp->f_mapping, 5277 orig_pos, 5278 orig_pos + ret - 1); 5279 5280 return ret; 5281 } 5282 5283 static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len, 5284 int advice) 5285 { 5286 struct address_space *mapping; 5287 struct backing_dev_info *bdi; 5288 struct inode *inode = file_inode(filp); 5289 int err; 5290 5291 trace_f2fs_fadvise(inode, offset, len, advice); 5292 5293 if (advice == POSIX_FADV_SEQUENTIAL) { 5294 if (S_ISFIFO(inode->i_mode)) 5295 return -ESPIPE; 5296 5297 mapping = filp->f_mapping; 5298 if (!mapping || len < 0) 5299 return -EINVAL; 5300 5301 bdi = inode_to_bdi(mapping->host); 5302 filp->f_ra.ra_pages = bdi->ra_pages * 5303 F2FS_I_SB(inode)->seq_file_ra_mul; 5304 spin_lock(&filp->f_lock); 5305 filp->f_mode &= ~FMODE_RANDOM; 5306 spin_unlock(&filp->f_lock); 5307 return 0; 5308 } else if (advice == POSIX_FADV_WILLNEED && offset == 0) { 5309 /* Load extent cache at the first readahead. */ 5310 f2fs_precache_extents(inode); 5311 } 5312 5313 err = generic_fadvise(filp, offset, len, advice); 5314 if (err) 5315 return err; 5316 5317 if (advice == POSIX_FADV_DONTNEED && 5318 (test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) && 5319 f2fs_compressed_file(inode))) 5320 f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino); 5321 else if (advice == POSIX_FADV_NOREUSE) 5322 err = f2fs_keep_noreuse_range(inode, offset, len); 5323 return err; 5324 } 5325 5326 #ifdef CONFIG_COMPAT 5327 struct compat_f2fs_gc_range { 5328 u32 sync; 5329 compat_u64 start; 5330 compat_u64 len; 5331 }; 5332 #define F2FS_IOC32_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11,\ 5333 struct compat_f2fs_gc_range) 5334 5335 static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg) 5336 { 5337 struct compat_f2fs_gc_range __user *urange; 5338 struct f2fs_gc_range range; 5339 int err; 5340 5341 urange = compat_ptr(arg); 5342 err = get_user(range.sync, &urange->sync); 5343 err |= get_user(range.start, &urange->start); 5344 err |= get_user(range.len, &urange->len); 5345 if (err) 5346 return -EFAULT; 5347 5348 return __f2fs_ioc_gc_range(file, &range); 5349 } 5350 5351 struct compat_f2fs_move_range { 5352 u32 dst_fd; 5353 compat_u64 pos_in; 5354 compat_u64 pos_out; 5355 compat_u64 len; 5356 }; 5357 #define F2FS_IOC32_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ 5358 struct compat_f2fs_move_range) 5359 5360 static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg) 5361 { 5362 struct compat_f2fs_move_range __user *urange; 5363 struct f2fs_move_range range; 5364 int err; 5365 5366 urange = compat_ptr(arg); 5367 err = get_user(range.dst_fd, &urange->dst_fd); 5368 err |= get_user(range.pos_in, &urange->pos_in); 5369 err |= get_user(range.pos_out, &urange->pos_out); 5370 err |= get_user(range.len, &urange->len); 5371 if (err) 5372 return -EFAULT; 5373 5374 return __f2fs_ioc_move_range(file, &range); 5375 } 5376 5377 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 5378 { 5379 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) 5380 return -EIO; 5381 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file)))) 5382 return -ENOSPC; 5383 5384 switch (cmd) { 5385 case FS_IOC32_GETVERSION: 5386 cmd = FS_IOC_GETVERSION; 5387 break; 5388 case F2FS_IOC32_GARBAGE_COLLECT_RANGE: 5389 return f2fs_compat_ioc_gc_range(file, arg); 5390 case F2FS_IOC32_MOVE_RANGE: 5391 return f2fs_compat_ioc_move_range(file, arg); 5392 case F2FS_IOC_START_ATOMIC_WRITE: 5393 case F2FS_IOC_START_ATOMIC_REPLACE: 5394 case F2FS_IOC_COMMIT_ATOMIC_WRITE: 5395 case F2FS_IOC_START_VOLATILE_WRITE: 5396 case F2FS_IOC_RELEASE_VOLATILE_WRITE: 5397 case F2FS_IOC_ABORT_ATOMIC_WRITE: 5398 case F2FS_IOC_SHUTDOWN: 5399 case FITRIM: 5400 case FS_IOC_SET_ENCRYPTION_POLICY: 5401 case FS_IOC_GET_ENCRYPTION_PWSALT: 5402 case FS_IOC_GET_ENCRYPTION_POLICY: 5403 case FS_IOC_GET_ENCRYPTION_POLICY_EX: 5404 case FS_IOC_ADD_ENCRYPTION_KEY: 5405 case FS_IOC_REMOVE_ENCRYPTION_KEY: 5406 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: 5407 case FS_IOC_GET_ENCRYPTION_KEY_STATUS: 5408 case FS_IOC_GET_ENCRYPTION_NONCE: 5409 case F2FS_IOC_GARBAGE_COLLECT: 5410 case F2FS_IOC_WRITE_CHECKPOINT: 5411 case F2FS_IOC_DEFRAGMENT: 5412 case F2FS_IOC_FLUSH_DEVICE: 5413 case F2FS_IOC_GET_FEATURES: 5414 case F2FS_IOC_GET_PIN_FILE: 5415 case F2FS_IOC_SET_PIN_FILE: 5416 case F2FS_IOC_PRECACHE_EXTENTS: 5417 case F2FS_IOC_RESIZE_FS: 5418 case FS_IOC_ENABLE_VERITY: 5419 case FS_IOC_MEASURE_VERITY: 5420 case FS_IOC_READ_VERITY_METADATA: 5421 case FS_IOC_GETFSLABEL: 5422 case FS_IOC_SETFSLABEL: 5423 case F2FS_IOC_GET_COMPRESS_BLOCKS: 5424 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: 5425 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: 5426 case F2FS_IOC_SEC_TRIM_FILE: 5427 case F2FS_IOC_GET_COMPRESS_OPTION: 5428 case F2FS_IOC_SET_COMPRESS_OPTION: 5429 case F2FS_IOC_DECOMPRESS_FILE: 5430 case F2FS_IOC_COMPRESS_FILE: 5431 case F2FS_IOC_GET_DEV_ALIAS_FILE: 5432 case F2FS_IOC_IO_PRIO: 5433 break; 5434 default: 5435 return -ENOIOCTLCMD; 5436 } 5437 return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); 5438 } 5439 #endif 5440 5441 const struct file_operations f2fs_file_operations = { 5442 .llseek = f2fs_llseek, 5443 .read_iter = f2fs_file_read_iter, 5444 .write_iter = f2fs_file_write_iter, 5445 .iopoll = iocb_bio_iopoll, 5446 .open = f2fs_file_open, 5447 .release = f2fs_release_file, 5448 .mmap_prepare = f2fs_file_mmap_prepare, 5449 .flush = f2fs_file_flush, 5450 .fsync = f2fs_sync_file, 5451 .fallocate = f2fs_fallocate, 5452 .unlocked_ioctl = f2fs_ioctl, 5453 #ifdef CONFIG_COMPAT 5454 .compat_ioctl = f2fs_compat_ioctl, 5455 #endif 5456 .splice_read = f2fs_file_splice_read, 5457 .splice_write = iter_file_splice_write, 5458 .fadvise = f2fs_file_fadvise, 5459 .fop_flags = FOP_BUFFER_RASYNC, 5460 .setlease = generic_setlease, 5461 }; 5462