1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/f2fs/file.c 4 * 5 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 6 * http://www.samsung.com/ 7 */ 8 #include <linux/blk-crypto.h> 9 #include <linux/fs.h> 10 #include <linux/f2fs_fs.h> 11 #include <linux/stat.h> 12 #include <linux/writeback.h> 13 #include <linux/blkdev.h> 14 #include <linux/falloc.h> 15 #include <linux/filelock.h> 16 #include <linux/types.h> 17 #include <linux/compat.h> 18 #include <linux/uaccess.h> 19 #include <linux/mount.h> 20 #include <linux/pagevec.h> 21 #include <linux/uio.h> 22 #include <linux/uuid.h> 23 #include <linux/file.h> 24 #include <linux/nls.h> 25 #include <linux/sched/signal.h> 26 #include <linux/fileattr.h> 27 #include <linux/fadvise.h> 28 #include <linux/iomap.h> 29 30 #include "f2fs.h" 31 #include "node.h" 32 #include "segment.h" 33 #include "xattr.h" 34 #include "acl.h" 35 #include "gc.h" 36 #include "iostat.h" 37 #include <trace/events/f2fs.h> 38 #include <uapi/linux/f2fs.h> 39 40 static void f2fs_zero_post_eof_page(struct inode *inode, 41 loff_t new_size, bool lock) 42 { 43 loff_t old_size = i_size_read(inode); 44 45 if (old_size >= new_size) 46 return; 47 48 if (mapping_empty(inode->i_mapping)) 49 return; 50 51 if (lock) 52 filemap_invalidate_lock(inode->i_mapping); 53 /* zero or drop pages only in range of [old_size, new_size] */ 54 truncate_inode_pages_range(inode->i_mapping, old_size, new_size); 55 if (lock) 56 filemap_invalidate_unlock(inode->i_mapping); 57 } 58 59 static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) 60 { 61 struct inode *inode = file_inode(vmf->vma->vm_file); 62 vm_flags_t flags = vmf->vma->vm_flags; 63 vm_fault_t ret; 64 65 ret = filemap_fault(vmf); 66 if (ret & VM_FAULT_LOCKED) 67 f2fs_update_iostat(F2FS_I_SB(inode), inode, 68 APP_MAPPED_READ_IO, F2FS_BLKSIZE); 69 70 trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret); 71 72 return ret; 73 } 74 75 static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) 76 { 77 struct folio *folio = page_folio(vmf->page); 78 struct inode *inode = file_inode(vmf->vma->vm_file); 79 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 80 struct dnode_of_data dn; 81 bool need_alloc = !f2fs_is_pinned_file(inode); 82 int err = 0; 83 vm_fault_t ret; 84 85 if (unlikely(IS_IMMUTABLE(inode))) 86 return VM_FAULT_SIGBUS; 87 88 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 89 err = -EIO; 90 goto out; 91 } 92 93 if (unlikely(f2fs_cp_error(sbi))) { 94 err = -EIO; 95 goto out; 96 } 97 98 if (!f2fs_is_checkpoint_ready(sbi)) { 99 err = -ENOSPC; 100 goto out; 101 } 102 103 err = f2fs_convert_inline_inode(inode); 104 if (err) 105 goto out; 106 107 #ifdef CONFIG_F2FS_FS_COMPRESSION 108 if (f2fs_compressed_file(inode)) { 109 int ret = f2fs_is_compressed_cluster(inode, folio->index); 110 111 if (ret < 0) { 112 err = ret; 113 goto out; 114 } else if (ret) { 115 need_alloc = false; 116 } 117 } 118 #endif 119 /* should do out of any locked page */ 120 if (need_alloc) 121 f2fs_balance_fs(sbi, true); 122 123 sb_start_pagefault(inode->i_sb); 124 125 f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); 126 127 f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT, true); 128 129 file_update_time(vmf->vma->vm_file); 130 filemap_invalidate_lock_shared(inode->i_mapping); 131 132 folio_lock(folio); 133 if (unlikely(folio->mapping != inode->i_mapping || 134 folio_pos(folio) > i_size_read(inode) || 135 !folio_test_uptodate(folio))) { 136 folio_unlock(folio); 137 err = -EFAULT; 138 goto out_sem; 139 } 140 141 set_new_dnode(&dn, inode, NULL, NULL, 0); 142 if (need_alloc) { 143 /* block allocation */ 144 err = f2fs_get_block_locked(&dn, folio->index); 145 } else { 146 err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); 147 f2fs_put_dnode(&dn); 148 if (f2fs_is_pinned_file(inode) && 149 !__is_valid_data_blkaddr(dn.data_blkaddr)) 150 err = -EIO; 151 } 152 153 if (err) { 154 folio_unlock(folio); 155 goto out_sem; 156 } 157 158 f2fs_folio_wait_writeback(folio, DATA, false, true); 159 160 /* wait for GCed page writeback via META_MAPPING */ 161 f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); 162 163 /* 164 * check to see if the page is mapped already (no holes) 165 */ 166 if (folio_test_mappedtodisk(folio)) 167 goto out_sem; 168 169 /* page is wholly or partially inside EOF */ 170 if (((loff_t)(folio->index + 1) << PAGE_SHIFT) > 171 i_size_read(inode)) { 172 loff_t offset; 173 174 offset = i_size_read(inode) & ~PAGE_MASK; 175 folio_zero_segment(folio, offset, folio_size(folio)); 176 } 177 folio_mark_dirty(folio); 178 179 f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE); 180 f2fs_update_time(sbi, REQ_TIME); 181 182 out_sem: 183 filemap_invalidate_unlock_shared(inode->i_mapping); 184 185 sb_end_pagefault(inode->i_sb); 186 out: 187 ret = vmf_fs_error(err); 188 189 trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret); 190 return ret; 191 } 192 193 static const struct vm_operations_struct f2fs_file_vm_ops = { 194 .fault = f2fs_filemap_fault, 195 .map_pages = filemap_map_pages, 196 .page_mkwrite = f2fs_vm_page_mkwrite, 197 }; 198 199 static int get_parent_ino(struct inode *inode, nid_t *pino) 200 { 201 struct dentry *dentry; 202 203 /* 204 * Make sure to get the non-deleted alias. The alias associated with 205 * the open file descriptor being fsync()'ed may be deleted already. 206 */ 207 dentry = d_find_alias(inode); 208 if (!dentry) 209 return 0; 210 211 *pino = d_parent_ino(dentry); 212 dput(dentry); 213 return 1; 214 } 215 216 static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) 217 { 218 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 219 enum cp_reason_type cp_reason = CP_NO_NEEDED; 220 221 if (!S_ISREG(inode->i_mode)) 222 cp_reason = CP_NON_REGULAR; 223 else if (f2fs_compressed_file(inode)) 224 cp_reason = CP_COMPRESSED; 225 else if (inode->i_nlink != 1) 226 cp_reason = CP_HARDLINK; 227 else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) 228 cp_reason = CP_SB_NEED_CP; 229 else if (file_wrong_pino(inode)) 230 cp_reason = CP_WRONG_PINO; 231 else if (!f2fs_space_for_roll_forward(sbi)) 232 cp_reason = CP_NO_SPC_ROLL; 233 else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) 234 cp_reason = CP_NODE_NEED_CP; 235 else if (test_opt(sbi, FASTBOOT)) 236 cp_reason = CP_FASTBOOT_MODE; 237 else if (F2FS_OPTION(sbi).active_logs == 2) 238 cp_reason = CP_SPEC_LOG_NUM; 239 else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT && 240 f2fs_need_dentry_mark(sbi, inode->i_ino) && 241 f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, 242 TRANS_DIR_INO)) 243 cp_reason = CP_RECOVER_DIR; 244 else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, 245 XATTR_DIR_INO)) 246 cp_reason = CP_XATTR_DIR; 247 248 return cp_reason; 249 } 250 251 static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) 252 { 253 struct folio *i = filemap_get_folio(NODE_MAPPING(sbi), ino); 254 bool ret = false; 255 /* But we need to avoid that there are some inode updates */ 256 if ((!IS_ERR(i) && folio_test_dirty(i)) || 257 f2fs_need_inode_block_update(sbi, ino)) 258 ret = true; 259 f2fs_folio_put(i, false); 260 return ret; 261 } 262 263 static void try_to_fix_pino(struct inode *inode) 264 { 265 struct f2fs_inode_info *fi = F2FS_I(inode); 266 nid_t pino; 267 268 f2fs_down_write(&fi->i_sem); 269 if (file_wrong_pino(inode) && inode->i_nlink == 1 && 270 get_parent_ino(inode, &pino)) { 271 f2fs_i_pino_write(inode, pino); 272 file_got_pino(inode); 273 } 274 f2fs_up_write(&fi->i_sem); 275 } 276 277 static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, 278 int datasync, bool atomic) 279 { 280 struct inode *inode = file->f_mapping->host; 281 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 282 nid_t ino = inode->i_ino; 283 int ret = 0; 284 enum cp_reason_type cp_reason = 0; 285 struct writeback_control wbc = { 286 .sync_mode = WB_SYNC_ALL, 287 .nr_to_write = LONG_MAX, 288 }; 289 unsigned int seq_id = 0; 290 291 if (unlikely(f2fs_readonly(inode->i_sb))) 292 return 0; 293 294 trace_f2fs_sync_file_enter(inode); 295 296 if (S_ISDIR(inode->i_mode)) 297 goto go_write; 298 299 /* if fdatasync is triggered, let's do in-place-update */ 300 if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) 301 set_inode_flag(inode, FI_NEED_IPU); 302 ret = file_write_and_wait_range(file, start, end); 303 clear_inode_flag(inode, FI_NEED_IPU); 304 305 if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { 306 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); 307 return ret; 308 } 309 310 /* if the inode is dirty, let's recover all the time */ 311 if (!f2fs_skip_inode_update(inode, datasync)) { 312 f2fs_write_inode(inode, NULL); 313 goto go_write; 314 } 315 316 /* 317 * if there is no written data, don't waste time to write recovery info. 318 */ 319 if (!is_inode_flag_set(inode, FI_APPEND_WRITE) && 320 !f2fs_exist_written_data(sbi, ino, APPEND_INO)) { 321 322 /* it may call write_inode just prior to fsync */ 323 if (need_inode_page_update(sbi, ino)) 324 goto go_write; 325 326 if (is_inode_flag_set(inode, FI_UPDATE_WRITE) || 327 f2fs_exist_written_data(sbi, ino, UPDATE_INO)) 328 goto flush_out; 329 goto out; 330 } else { 331 /* 332 * for OPU case, during fsync(), node can be persisted before 333 * data when lower device doesn't support write barrier, result 334 * in data corruption after SPO. 335 * So for strict fsync mode, force to use atomic write semantics 336 * to keep write order in between data/node and last node to 337 * avoid potential data corruption. 338 */ 339 if (F2FS_OPTION(sbi).fsync_mode == 340 FSYNC_MODE_STRICT && !atomic) 341 atomic = true; 342 } 343 go_write: 344 /* 345 * Both of fdatasync() and fsync() are able to be recovered from 346 * sudden-power-off. 347 */ 348 f2fs_down_read(&F2FS_I(inode)->i_sem); 349 cp_reason = need_do_checkpoint(inode); 350 f2fs_up_read(&F2FS_I(inode)->i_sem); 351 352 if (cp_reason) { 353 /* all the dirty node pages should be flushed for POR */ 354 ret = f2fs_sync_fs(inode->i_sb, 1); 355 356 /* 357 * We've secured consistency through sync_fs. Following pino 358 * will be used only for fsynced inodes after checkpoint. 359 */ 360 try_to_fix_pino(inode); 361 clear_inode_flag(inode, FI_APPEND_WRITE); 362 clear_inode_flag(inode, FI_UPDATE_WRITE); 363 goto out; 364 } 365 sync_nodes: 366 atomic_inc(&sbi->wb_sync_req[NODE]); 367 ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id); 368 atomic_dec(&sbi->wb_sync_req[NODE]); 369 if (ret) 370 goto out; 371 372 /* if cp_error was enabled, we should avoid infinite loop */ 373 if (unlikely(f2fs_cp_error(sbi))) { 374 ret = -EIO; 375 goto out; 376 } 377 378 if (f2fs_need_inode_block_update(sbi, ino)) { 379 f2fs_mark_inode_dirty_sync(inode, true); 380 f2fs_write_inode(inode, NULL); 381 goto sync_nodes; 382 } 383 384 /* 385 * If it's atomic_write, it's just fine to keep write ordering. So 386 * here we don't need to wait for node write completion, since we use 387 * node chain which serializes node blocks. If one of node writes are 388 * reordered, we can see simply broken chain, resulting in stopping 389 * roll-forward recovery. It means we'll recover all or none node blocks 390 * given fsync mark. 391 */ 392 if (!atomic) { 393 ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id); 394 if (ret) 395 goto out; 396 } 397 398 /* once recovery info is written, don't need to tack this */ 399 f2fs_remove_ino_entry(sbi, ino, APPEND_INO); 400 clear_inode_flag(inode, FI_APPEND_WRITE); 401 flush_out: 402 if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) 403 ret = f2fs_issue_flush(sbi, inode->i_ino); 404 if (!ret) { 405 f2fs_remove_ino_entry(sbi, ino, UPDATE_INO); 406 clear_inode_flag(inode, FI_UPDATE_WRITE); 407 f2fs_remove_ino_entry(sbi, ino, FLUSH_INO); 408 } 409 f2fs_update_time(sbi, REQ_TIME); 410 out: 411 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); 412 return ret; 413 } 414 415 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 416 { 417 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) 418 return -EIO; 419 return f2fs_do_sync_file(file, start, end, datasync, false); 420 } 421 422 static bool __found_offset(struct address_space *mapping, 423 struct dnode_of_data *dn, pgoff_t index, int whence) 424 { 425 block_t blkaddr = f2fs_data_blkaddr(dn); 426 struct inode *inode = mapping->host; 427 bool compressed_cluster = false; 428 429 if (f2fs_compressed_file(inode)) { 430 block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_folio, 431 ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size)); 432 433 compressed_cluster = first_blkaddr == COMPRESS_ADDR; 434 } 435 436 switch (whence) { 437 case SEEK_DATA: 438 if (__is_valid_data_blkaddr(blkaddr)) 439 return true; 440 if (blkaddr == NEW_ADDR && 441 xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY)) 442 return true; 443 if (compressed_cluster) 444 return true; 445 break; 446 case SEEK_HOLE: 447 if (compressed_cluster) 448 return false; 449 if (blkaddr == NULL_ADDR) 450 return true; 451 break; 452 } 453 return false; 454 } 455 456 static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) 457 { 458 struct inode *inode = file->f_mapping->host; 459 loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 460 struct dnode_of_data dn; 461 pgoff_t pgofs, end_offset; 462 loff_t data_ofs = offset; 463 loff_t isize; 464 int err = 0; 465 466 inode_lock_shared(inode); 467 468 isize = i_size_read(inode); 469 if (offset >= isize) 470 goto fail; 471 472 /* handle inline data case */ 473 if (f2fs_has_inline_data(inode)) { 474 if (whence == SEEK_HOLE) { 475 data_ofs = isize; 476 goto found; 477 } else if (whence == SEEK_DATA) { 478 data_ofs = offset; 479 goto found; 480 } 481 } 482 483 pgofs = (pgoff_t)(offset >> PAGE_SHIFT); 484 485 for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) { 486 set_new_dnode(&dn, inode, NULL, NULL, 0); 487 err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE); 488 if (err && err != -ENOENT) { 489 goto fail; 490 } else if (err == -ENOENT) { 491 /* direct node does not exists */ 492 if (whence == SEEK_DATA) { 493 pgofs = f2fs_get_next_page_offset(&dn, pgofs); 494 continue; 495 } else { 496 goto found; 497 } 498 } 499 500 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 501 502 /* find data/hole in dnode block */ 503 for (; dn.ofs_in_node < end_offset; 504 dn.ofs_in_node++, pgofs++, 505 data_ofs = (loff_t)pgofs << PAGE_SHIFT) { 506 block_t blkaddr; 507 508 blkaddr = f2fs_data_blkaddr(&dn); 509 510 if (__is_valid_data_blkaddr(blkaddr) && 511 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), 512 blkaddr, DATA_GENERIC_ENHANCE)) { 513 f2fs_put_dnode(&dn); 514 goto fail; 515 } 516 517 if (__found_offset(file->f_mapping, &dn, 518 pgofs, whence)) { 519 f2fs_put_dnode(&dn); 520 goto found; 521 } 522 } 523 f2fs_put_dnode(&dn); 524 } 525 526 if (whence == SEEK_DATA) 527 goto fail; 528 found: 529 if (whence == SEEK_HOLE && data_ofs > isize) 530 data_ofs = isize; 531 inode_unlock_shared(inode); 532 return vfs_setpos(file, data_ofs, maxbytes); 533 fail: 534 inode_unlock_shared(inode); 535 return -ENXIO; 536 } 537 538 static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) 539 { 540 struct inode *inode = file->f_mapping->host; 541 loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 542 543 switch (whence) { 544 case SEEK_SET: 545 case SEEK_CUR: 546 case SEEK_END: 547 return generic_file_llseek_size(file, offset, whence, 548 maxbytes, i_size_read(inode)); 549 case SEEK_DATA: 550 case SEEK_HOLE: 551 if (offset < 0) 552 return -ENXIO; 553 return f2fs_seek_block(file, offset, whence); 554 } 555 556 return -EINVAL; 557 } 558 559 static int f2fs_file_mmap_prepare(struct vm_area_desc *desc) 560 { 561 struct file *file = desc->file; 562 struct inode *inode = file_inode(file); 563 564 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 565 return -EIO; 566 567 if (!f2fs_is_compress_backend_ready(inode)) 568 return -EOPNOTSUPP; 569 570 file_accessed(file); 571 desc->vm_ops = &f2fs_file_vm_ops; 572 573 f2fs_down_read(&F2FS_I(inode)->i_sem); 574 set_inode_flag(inode, FI_MMAP_FILE); 575 f2fs_up_read(&F2FS_I(inode)->i_sem); 576 577 return 0; 578 } 579 580 static int finish_preallocate_blocks(struct inode *inode) 581 { 582 int ret = 0; 583 bool opened; 584 585 f2fs_down_read(&F2FS_I(inode)->i_sem); 586 opened = is_inode_flag_set(inode, FI_OPENED_FILE); 587 f2fs_up_read(&F2FS_I(inode)->i_sem); 588 if (opened) 589 return 0; 590 591 inode_lock(inode); 592 if (is_inode_flag_set(inode, FI_OPENED_FILE)) 593 goto out_unlock; 594 595 if (!file_should_truncate(inode)) 596 goto out_update; 597 598 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 599 filemap_invalidate_lock(inode->i_mapping); 600 601 truncate_setsize(inode, i_size_read(inode)); 602 ret = f2fs_truncate(inode); 603 604 filemap_invalidate_unlock(inode->i_mapping); 605 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 606 if (ret) 607 goto out_unlock; 608 609 file_dont_truncate(inode); 610 out_update: 611 f2fs_down_write(&F2FS_I(inode)->i_sem); 612 set_inode_flag(inode, FI_OPENED_FILE); 613 f2fs_up_write(&F2FS_I(inode)->i_sem); 614 out_unlock: 615 inode_unlock(inode); 616 return ret; 617 } 618 619 static int f2fs_file_open(struct inode *inode, struct file *filp) 620 { 621 int err = fscrypt_file_open(inode, filp); 622 623 if (err) 624 return err; 625 626 if (!f2fs_is_compress_backend_ready(inode)) 627 return -EOPNOTSUPP; 628 629 err = fsverity_file_open(inode, filp); 630 if (err) 631 return err; 632 633 filp->f_mode |= FMODE_NOWAIT; 634 filp->f_mode |= FMODE_CAN_ODIRECT; 635 636 err = dquot_file_open(inode, filp); 637 if (err) 638 return err; 639 640 err = finish_preallocate_blocks(inode); 641 if (!err) 642 atomic_inc(&F2FS_I(inode)->open_count); 643 return err; 644 } 645 646 void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) 647 { 648 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 649 int nr_free = 0, ofs = dn->ofs_in_node, len = count; 650 __le32 *addr; 651 bool compressed_cluster = false; 652 int cluster_index = 0, valid_blocks = 0; 653 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 654 bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks); 655 block_t blkstart; 656 int blklen = 0; 657 658 addr = get_dnode_addr(dn->inode, dn->node_folio) + ofs; 659 blkstart = le32_to_cpu(*addr); 660 661 /* Assumption: truncation starts with cluster */ 662 for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { 663 block_t blkaddr = le32_to_cpu(*addr); 664 665 if (f2fs_compressed_file(dn->inode) && 666 !(cluster_index & (cluster_size - 1))) { 667 if (compressed_cluster) 668 f2fs_i_compr_blocks_update(dn->inode, 669 valid_blocks, false); 670 compressed_cluster = (blkaddr == COMPRESS_ADDR); 671 valid_blocks = 0; 672 } 673 674 if (blkaddr == NULL_ADDR) 675 goto next; 676 677 f2fs_set_data_blkaddr(dn, NULL_ADDR); 678 679 if (__is_valid_data_blkaddr(blkaddr)) { 680 if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE)) 681 goto next; 682 if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr, 683 DATA_GENERIC_ENHANCE)) 684 goto next; 685 if (compressed_cluster) 686 valid_blocks++; 687 } 688 689 if (blkstart + blklen == blkaddr) { 690 blklen++; 691 } else { 692 f2fs_invalidate_blocks(sbi, blkstart, blklen); 693 blkstart = blkaddr; 694 blklen = 1; 695 } 696 697 if (!released || blkaddr != COMPRESS_ADDR) 698 nr_free++; 699 700 continue; 701 702 next: 703 if (blklen) 704 f2fs_invalidate_blocks(sbi, blkstart, blklen); 705 706 blkstart = le32_to_cpu(*(addr + 1)); 707 blklen = 0; 708 } 709 710 if (blklen) 711 f2fs_invalidate_blocks(sbi, blkstart, blklen); 712 713 if (compressed_cluster) 714 f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false); 715 716 if (nr_free) { 717 pgoff_t fofs; 718 /* 719 * once we invalidate valid blkaddr in range [ofs, ofs + count], 720 * we will invalidate all blkaddr in the whole range. 721 */ 722 fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), 723 dn->inode) + ofs; 724 f2fs_update_read_extent_cache_range(dn, fofs, 0, len); 725 f2fs_update_age_extent_cache_range(dn, fofs, len); 726 dec_valid_block_count(sbi, dn->inode, nr_free); 727 } 728 dn->ofs_in_node = ofs; 729 730 f2fs_update_time(sbi, REQ_TIME); 731 trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, 732 dn->ofs_in_node, nr_free); 733 } 734 735 static int truncate_partial_data_page(struct inode *inode, u64 from, 736 bool cache_only) 737 { 738 loff_t offset = from & (PAGE_SIZE - 1); 739 pgoff_t index = from >> PAGE_SHIFT; 740 struct address_space *mapping = inode->i_mapping; 741 struct folio *folio; 742 743 if (!offset && !cache_only) 744 return 0; 745 746 if (cache_only) { 747 folio = filemap_lock_folio(mapping, index); 748 if (IS_ERR(folio)) 749 return 0; 750 if (folio_test_uptodate(folio)) 751 goto truncate_out; 752 f2fs_folio_put(folio, true); 753 return 0; 754 } 755 756 folio = f2fs_get_lock_data_folio(inode, index, true); 757 if (IS_ERR(folio)) 758 return PTR_ERR(folio) == -ENOENT ? 0 : PTR_ERR(folio); 759 truncate_out: 760 f2fs_folio_wait_writeback(folio, DATA, true, true); 761 folio_zero_segment(folio, offset, folio_size(folio)); 762 763 /* An encrypted inode should have a key and truncate the last page. */ 764 f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode)); 765 if (!cache_only) 766 folio_mark_dirty(folio); 767 f2fs_folio_put(folio, true); 768 return 0; 769 } 770 771 int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) 772 { 773 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 774 struct dnode_of_data dn; 775 pgoff_t free_from; 776 int count = 0, err = 0; 777 struct folio *ifolio; 778 bool truncate_page = false; 779 780 trace_f2fs_truncate_blocks_enter(inode, from); 781 782 if (IS_DEVICE_ALIASING(inode) && from) { 783 err = -EINVAL; 784 goto out_err; 785 } 786 787 free_from = (pgoff_t)F2FS_BLK_ALIGN(from); 788 789 if (free_from >= max_file_blocks(inode)) 790 goto free_partial; 791 792 if (lock) 793 f2fs_lock_op(sbi); 794 795 ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); 796 if (IS_ERR(ifolio)) { 797 err = PTR_ERR(ifolio); 798 goto out; 799 } 800 801 if (IS_DEVICE_ALIASING(inode)) { 802 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; 803 struct extent_info ei = et->largest; 804 805 f2fs_invalidate_blocks(sbi, ei.blk, ei.len); 806 807 dec_valid_block_count(sbi, inode, ei.len); 808 f2fs_update_time(sbi, REQ_TIME); 809 810 f2fs_folio_put(ifolio, true); 811 goto out; 812 } 813 814 if (f2fs_has_inline_data(inode)) { 815 f2fs_truncate_inline_inode(inode, ifolio, from); 816 f2fs_folio_put(ifolio, true); 817 truncate_page = true; 818 goto out; 819 } 820 821 set_new_dnode(&dn, inode, ifolio, NULL, 0); 822 err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); 823 if (err) { 824 if (err == -ENOENT) 825 goto free_next; 826 goto out; 827 } 828 829 count = ADDRS_PER_PAGE(dn.node_folio, inode); 830 831 count -= dn.ofs_in_node; 832 f2fs_bug_on(sbi, count < 0); 833 834 if (dn.ofs_in_node || IS_INODE(dn.node_folio)) { 835 f2fs_truncate_data_blocks_range(&dn, count); 836 free_from += count; 837 } 838 839 f2fs_put_dnode(&dn); 840 free_next: 841 err = f2fs_truncate_inode_blocks(inode, free_from); 842 out: 843 if (lock) 844 f2fs_unlock_op(sbi); 845 free_partial: 846 /* lastly zero out the first data page */ 847 if (!err) 848 err = truncate_partial_data_page(inode, from, truncate_page); 849 out_err: 850 trace_f2fs_truncate_blocks_exit(inode, err); 851 return err; 852 } 853 854 int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) 855 { 856 u64 free_from = from; 857 int err; 858 859 #ifdef CONFIG_F2FS_FS_COMPRESSION 860 /* 861 * for compressed file, only support cluster size 862 * aligned truncation. 863 */ 864 if (f2fs_compressed_file(inode)) 865 free_from = round_up(from, 866 F2FS_I(inode)->i_cluster_size << PAGE_SHIFT); 867 #endif 868 869 err = f2fs_do_truncate_blocks(inode, free_from, lock); 870 if (err) 871 return err; 872 873 #ifdef CONFIG_F2FS_FS_COMPRESSION 874 /* 875 * For compressed file, after release compress blocks, don't allow write 876 * direct, but we should allow write direct after truncate to zero. 877 */ 878 if (f2fs_compressed_file(inode) && !free_from 879 && is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) 880 clear_inode_flag(inode, FI_COMPRESS_RELEASED); 881 882 if (from != free_from) { 883 err = f2fs_truncate_partial_cluster(inode, from, lock); 884 if (err) 885 return err; 886 } 887 #endif 888 889 return 0; 890 } 891 892 int f2fs_truncate(struct inode *inode) 893 { 894 int err; 895 896 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 897 return -EIO; 898 899 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 900 S_ISLNK(inode->i_mode))) 901 return 0; 902 903 trace_f2fs_truncate(inode); 904 905 if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) 906 return -EIO; 907 908 err = f2fs_dquot_initialize(inode); 909 if (err) 910 return err; 911 912 /* we should check inline_data size */ 913 if (!f2fs_may_inline_data(inode)) { 914 err = f2fs_convert_inline_inode(inode); 915 if (err) { 916 /* 917 * Always truncate page #0 to avoid page cache 918 * leak in evict() path. 919 */ 920 truncate_inode_pages_range(inode->i_mapping, 921 F2FS_BLK_TO_BYTES(0), 922 F2FS_BLK_END_BYTES(0)); 923 return err; 924 } 925 } 926 927 err = f2fs_truncate_blocks(inode, i_size_read(inode), true); 928 if (err) 929 return err; 930 931 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 932 f2fs_mark_inode_dirty_sync(inode, false); 933 return 0; 934 } 935 936 static bool f2fs_force_buffered_io(struct inode *inode, int rw) 937 { 938 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 939 940 if (!fscrypt_dio_supported(inode)) 941 return true; 942 if (fsverity_active(inode)) 943 return true; 944 if (f2fs_compressed_file(inode)) 945 return true; 946 /* 947 * only force direct read to use buffered IO, for direct write, 948 * it expects inline data conversion before committing IO. 949 */ 950 if (f2fs_has_inline_data(inode) && rw == READ) 951 return true; 952 953 /* disallow direct IO if any of devices has unaligned blksize */ 954 if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) 955 return true; 956 /* 957 * for blkzoned device, fallback direct IO to buffered IO, so 958 * all IOs can be serialized by log-structured write. 959 */ 960 if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) && 961 !f2fs_is_pinned_file(inode)) 962 return true; 963 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) 964 return true; 965 966 return false; 967 } 968 969 int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path, 970 struct kstat *stat, u32 request_mask, unsigned int query_flags) 971 { 972 struct inode *inode = d_inode(path->dentry); 973 struct f2fs_inode_info *fi = F2FS_I(inode); 974 struct f2fs_inode *ri = NULL; 975 unsigned int flags; 976 977 if (f2fs_has_extra_attr(inode) && 978 f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && 979 F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { 980 stat->result_mask |= STATX_BTIME; 981 stat->btime.tv_sec = fi->i_crtime.tv_sec; 982 stat->btime.tv_nsec = fi->i_crtime.tv_nsec; 983 } 984 985 /* 986 * Return the DIO alignment restrictions if requested. We only return 987 * this information when requested, since on encrypted files it might 988 * take a fair bit of work to get if the file wasn't opened recently. 989 * 990 * f2fs sometimes supports DIO reads but not DIO writes. STATX_DIOALIGN 991 * cannot represent that, so in that case we report no DIO support. 992 */ 993 if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) { 994 unsigned int bsize = i_blocksize(inode); 995 996 stat->result_mask |= STATX_DIOALIGN; 997 if (!f2fs_force_buffered_io(inode, WRITE)) { 998 stat->dio_mem_align = bsize; 999 stat->dio_offset_align = bsize; 1000 } 1001 } 1002 1003 flags = fi->i_flags; 1004 if (flags & F2FS_COMPR_FL) 1005 stat->attributes |= STATX_ATTR_COMPRESSED; 1006 if (flags & F2FS_APPEND_FL) 1007 stat->attributes |= STATX_ATTR_APPEND; 1008 if (IS_ENCRYPTED(inode)) 1009 stat->attributes |= STATX_ATTR_ENCRYPTED; 1010 if (flags & F2FS_IMMUTABLE_FL) 1011 stat->attributes |= STATX_ATTR_IMMUTABLE; 1012 if (flags & F2FS_NODUMP_FL) 1013 stat->attributes |= STATX_ATTR_NODUMP; 1014 if (IS_VERITY(inode)) 1015 stat->attributes |= STATX_ATTR_VERITY; 1016 1017 stat->attributes_mask |= (STATX_ATTR_COMPRESSED | 1018 STATX_ATTR_APPEND | 1019 STATX_ATTR_ENCRYPTED | 1020 STATX_ATTR_IMMUTABLE | 1021 STATX_ATTR_NODUMP | 1022 STATX_ATTR_VERITY); 1023 1024 generic_fillattr(idmap, request_mask, inode, stat); 1025 1026 /* we need to show initial sectors used for inline_data/dentries */ 1027 if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) || 1028 f2fs_has_inline_dentry(inode)) 1029 stat->blocks += (stat->size + 511) >> 9; 1030 1031 return 0; 1032 } 1033 1034 #ifdef CONFIG_F2FS_FS_POSIX_ACL 1035 static void __setattr_copy(struct mnt_idmap *idmap, 1036 struct inode *inode, const struct iattr *attr) 1037 { 1038 unsigned int ia_valid = attr->ia_valid; 1039 1040 i_uid_update(idmap, attr, inode); 1041 i_gid_update(idmap, attr, inode); 1042 if (ia_valid & ATTR_ATIME) 1043 inode_set_atime_to_ts(inode, attr->ia_atime); 1044 if (ia_valid & ATTR_MTIME) 1045 inode_set_mtime_to_ts(inode, attr->ia_mtime); 1046 if (ia_valid & ATTR_CTIME) 1047 inode_set_ctime_to_ts(inode, attr->ia_ctime); 1048 if (ia_valid & ATTR_MODE) { 1049 umode_t mode = attr->ia_mode; 1050 1051 if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) 1052 mode &= ~S_ISGID; 1053 set_acl_inode(inode, mode); 1054 } 1055 } 1056 #else 1057 #define __setattr_copy setattr_copy 1058 #endif 1059 1060 int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 1061 struct iattr *attr) 1062 { 1063 struct inode *inode = d_inode(dentry); 1064 struct f2fs_inode_info *fi = F2FS_I(inode); 1065 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1066 int err; 1067 1068 if (unlikely(f2fs_cp_error(sbi))) 1069 return -EIO; 1070 1071 err = setattr_prepare(idmap, dentry, attr); 1072 if (err) 1073 return err; 1074 1075 err = fscrypt_prepare_setattr(dentry, attr); 1076 if (err) 1077 return err; 1078 1079 err = fsverity_prepare_setattr(dentry, attr); 1080 if (err) 1081 return err; 1082 1083 if (unlikely(IS_IMMUTABLE(inode))) 1084 return -EPERM; 1085 1086 if (unlikely(IS_APPEND(inode) && 1087 (attr->ia_valid & (ATTR_MODE | ATTR_UID | 1088 ATTR_GID | ATTR_TIMES_SET)))) 1089 return -EPERM; 1090 1091 if ((attr->ia_valid & ATTR_SIZE)) { 1092 if (!f2fs_is_compress_backend_ready(inode) || 1093 IS_DEVICE_ALIASING(inode)) 1094 return -EOPNOTSUPP; 1095 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && 1096 !IS_ALIGNED(attr->ia_size, 1097 F2FS_BLK_TO_BYTES(fi->i_cluster_size))) 1098 return -EINVAL; 1099 /* 1100 * To prevent scattered pin block generation, we don't allow 1101 * smaller/equal size unaligned truncation for pinned file. 1102 * We only support overwrite IO to pinned file, so don't 1103 * care about larger size truncation. 1104 */ 1105 if (f2fs_is_pinned_file(inode) && 1106 attr->ia_size <= i_size_read(inode) && 1107 !IS_ALIGNED(attr->ia_size, 1108 F2FS_BLK_TO_BYTES(CAP_BLKS_PER_SEC(sbi)))) 1109 return -EINVAL; 1110 } 1111 1112 if (is_quota_modification(idmap, inode, attr)) { 1113 err = f2fs_dquot_initialize(inode); 1114 if (err) 1115 return err; 1116 } 1117 if (i_uid_needs_update(idmap, attr, inode) || 1118 i_gid_needs_update(idmap, attr, inode)) { 1119 f2fs_lock_op(sbi); 1120 err = dquot_transfer(idmap, inode, attr); 1121 if (err) { 1122 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); 1123 f2fs_unlock_op(sbi); 1124 return err; 1125 } 1126 /* 1127 * update uid/gid under lock_op(), so that dquot and inode can 1128 * be updated atomically. 1129 */ 1130 i_uid_update(idmap, attr, inode); 1131 i_gid_update(idmap, attr, inode); 1132 f2fs_mark_inode_dirty_sync(inode, true); 1133 f2fs_unlock_op(sbi); 1134 } 1135 1136 if (attr->ia_valid & ATTR_SIZE) { 1137 loff_t old_size = i_size_read(inode); 1138 1139 if (attr->ia_size > MAX_INLINE_DATA(inode)) { 1140 /* 1141 * should convert inline inode before i_size_write to 1142 * keep smaller than inline_data size with inline flag. 1143 */ 1144 err = f2fs_convert_inline_inode(inode); 1145 if (err) 1146 return err; 1147 } 1148 1149 /* 1150 * wait for inflight dio, blocks should be removed after 1151 * IO completion. 1152 */ 1153 if (attr->ia_size < old_size) 1154 inode_dio_wait(inode); 1155 1156 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 1157 filemap_invalidate_lock(inode->i_mapping); 1158 1159 if (attr->ia_size > old_size) 1160 f2fs_zero_post_eof_page(inode, attr->ia_size, false); 1161 truncate_setsize(inode, attr->ia_size); 1162 1163 if (attr->ia_size <= old_size) 1164 err = f2fs_truncate(inode); 1165 /* 1166 * do not trim all blocks after i_size if target size is 1167 * larger than i_size. 1168 */ 1169 filemap_invalidate_unlock(inode->i_mapping); 1170 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 1171 if (err) 1172 return err; 1173 1174 spin_lock(&fi->i_size_lock); 1175 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 1176 fi->last_disk_size = i_size_read(inode); 1177 spin_unlock(&fi->i_size_lock); 1178 } 1179 1180 __setattr_copy(idmap, inode, attr); 1181 1182 if (attr->ia_valid & ATTR_MODE) { 1183 err = posix_acl_chmod(idmap, dentry, f2fs_get_inode_mode(inode)); 1184 1185 if (is_inode_flag_set(inode, FI_ACL_MODE)) { 1186 if (!err) 1187 inode->i_mode = fi->i_acl_mode; 1188 clear_inode_flag(inode, FI_ACL_MODE); 1189 } 1190 } 1191 1192 /* file size may changed here */ 1193 f2fs_mark_inode_dirty_sync(inode, true); 1194 1195 /* inode change will produce dirty node pages flushed by checkpoint */ 1196 f2fs_balance_fs(sbi, true); 1197 1198 return err; 1199 } 1200 1201 const struct inode_operations f2fs_file_inode_operations = { 1202 .getattr = f2fs_getattr, 1203 .setattr = f2fs_setattr, 1204 .get_inode_acl = f2fs_get_acl, 1205 .set_acl = f2fs_set_acl, 1206 .listxattr = f2fs_listxattr, 1207 .fiemap = f2fs_fiemap, 1208 .fileattr_get = f2fs_fileattr_get, 1209 .fileattr_set = f2fs_fileattr_set, 1210 }; 1211 1212 static int fill_zero(struct inode *inode, pgoff_t index, 1213 loff_t start, loff_t len) 1214 { 1215 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1216 struct folio *folio; 1217 1218 if (!len) 1219 return 0; 1220 1221 f2fs_balance_fs(sbi, true); 1222 1223 f2fs_lock_op(sbi); 1224 folio = f2fs_get_new_data_folio(inode, NULL, index, false); 1225 f2fs_unlock_op(sbi); 1226 1227 if (IS_ERR(folio)) 1228 return PTR_ERR(folio); 1229 1230 f2fs_folio_wait_writeback(folio, DATA, true, true); 1231 folio_zero_range(folio, start, len); 1232 folio_mark_dirty(folio); 1233 f2fs_folio_put(folio, true); 1234 return 0; 1235 } 1236 1237 int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) 1238 { 1239 int err; 1240 1241 while (pg_start < pg_end) { 1242 struct dnode_of_data dn; 1243 pgoff_t end_offset, count; 1244 1245 set_new_dnode(&dn, inode, NULL, NULL, 0); 1246 err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); 1247 if (err) { 1248 if (err == -ENOENT) { 1249 pg_start = f2fs_get_next_page_offset(&dn, 1250 pg_start); 1251 continue; 1252 } 1253 return err; 1254 } 1255 1256 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 1257 count = min(end_offset - dn.ofs_in_node, pg_end - pg_start); 1258 1259 f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset); 1260 1261 f2fs_truncate_data_blocks_range(&dn, count); 1262 f2fs_put_dnode(&dn); 1263 1264 pg_start += count; 1265 } 1266 return 0; 1267 } 1268 1269 static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len) 1270 { 1271 pgoff_t pg_start, pg_end; 1272 loff_t off_start, off_end; 1273 int ret; 1274 1275 ret = f2fs_convert_inline_inode(inode); 1276 if (ret) 1277 return ret; 1278 1279 f2fs_zero_post_eof_page(inode, offset + len, true); 1280 1281 pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; 1282 pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; 1283 1284 off_start = offset & (PAGE_SIZE - 1); 1285 off_end = (offset + len) & (PAGE_SIZE - 1); 1286 1287 if (pg_start == pg_end) { 1288 ret = fill_zero(inode, pg_start, off_start, 1289 off_end - off_start); 1290 if (ret) 1291 return ret; 1292 } else { 1293 if (off_start) { 1294 ret = fill_zero(inode, pg_start++, off_start, 1295 PAGE_SIZE - off_start); 1296 if (ret) 1297 return ret; 1298 } 1299 if (off_end) { 1300 ret = fill_zero(inode, pg_end, 0, off_end); 1301 if (ret) 1302 return ret; 1303 } 1304 1305 if (pg_start < pg_end) { 1306 loff_t blk_start, blk_end; 1307 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1308 1309 f2fs_balance_fs(sbi, true); 1310 1311 blk_start = (loff_t)pg_start << PAGE_SHIFT; 1312 blk_end = (loff_t)pg_end << PAGE_SHIFT; 1313 1314 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1315 filemap_invalidate_lock(inode->i_mapping); 1316 1317 truncate_pagecache_range(inode, blk_start, blk_end - 1); 1318 1319 f2fs_lock_op(sbi); 1320 ret = f2fs_truncate_hole(inode, pg_start, pg_end); 1321 f2fs_unlock_op(sbi); 1322 1323 filemap_invalidate_unlock(inode->i_mapping); 1324 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1325 } 1326 } 1327 1328 return ret; 1329 } 1330 1331 static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, 1332 int *do_replace, pgoff_t off, pgoff_t len) 1333 { 1334 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1335 struct dnode_of_data dn; 1336 int ret, done, i; 1337 1338 next_dnode: 1339 set_new_dnode(&dn, inode, NULL, NULL, 0); 1340 ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); 1341 if (ret && ret != -ENOENT) { 1342 return ret; 1343 } else if (ret == -ENOENT) { 1344 if (dn.max_level == 0) 1345 return -ENOENT; 1346 done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - 1347 dn.ofs_in_node, len); 1348 blkaddr += done; 1349 do_replace += done; 1350 goto next; 1351 } 1352 1353 done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, inode) - 1354 dn.ofs_in_node, len); 1355 for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { 1356 *blkaddr = f2fs_data_blkaddr(&dn); 1357 1358 if (__is_valid_data_blkaddr(*blkaddr) && 1359 !f2fs_is_valid_blkaddr(sbi, *blkaddr, 1360 DATA_GENERIC_ENHANCE)) { 1361 f2fs_put_dnode(&dn); 1362 return -EFSCORRUPTED; 1363 } 1364 1365 if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { 1366 1367 if (f2fs_lfs_mode(sbi)) { 1368 f2fs_put_dnode(&dn); 1369 return -EOPNOTSUPP; 1370 } 1371 1372 /* do not invalidate this block address */ 1373 f2fs_update_data_blkaddr(&dn, NULL_ADDR); 1374 *do_replace = 1; 1375 } 1376 } 1377 f2fs_put_dnode(&dn); 1378 next: 1379 len -= done; 1380 off += done; 1381 if (len) 1382 goto next_dnode; 1383 return 0; 1384 } 1385 1386 static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, 1387 int *do_replace, pgoff_t off, int len) 1388 { 1389 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1390 struct dnode_of_data dn; 1391 int ret, i; 1392 1393 for (i = 0; i < len; i++, do_replace++, blkaddr++) { 1394 if (*do_replace == 0) 1395 continue; 1396 1397 set_new_dnode(&dn, inode, NULL, NULL, 0); 1398 ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); 1399 if (ret) { 1400 dec_valid_block_count(sbi, inode, 1); 1401 f2fs_invalidate_blocks(sbi, *blkaddr, 1); 1402 } else { 1403 f2fs_update_data_blkaddr(&dn, *blkaddr); 1404 } 1405 f2fs_put_dnode(&dn); 1406 } 1407 return 0; 1408 } 1409 1410 static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, 1411 block_t *blkaddr, int *do_replace, 1412 pgoff_t src, pgoff_t dst, pgoff_t len, bool full) 1413 { 1414 struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode); 1415 pgoff_t i = 0; 1416 int ret; 1417 1418 while (i < len) { 1419 if (blkaddr[i] == NULL_ADDR && !full) { 1420 i++; 1421 continue; 1422 } 1423 1424 if (do_replace[i] || blkaddr[i] == NULL_ADDR) { 1425 struct dnode_of_data dn; 1426 struct node_info ni; 1427 size_t new_size; 1428 pgoff_t ilen; 1429 1430 set_new_dnode(&dn, dst_inode, NULL, NULL, 0); 1431 ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE); 1432 if (ret) 1433 return ret; 1434 1435 ret = f2fs_get_node_info(sbi, dn.nid, &ni, false); 1436 if (ret) { 1437 f2fs_put_dnode(&dn); 1438 return ret; 1439 } 1440 1441 ilen = min((pgoff_t) 1442 ADDRS_PER_PAGE(dn.node_folio, dst_inode) - 1443 dn.ofs_in_node, len - i); 1444 do { 1445 dn.data_blkaddr = f2fs_data_blkaddr(&dn); 1446 f2fs_truncate_data_blocks_range(&dn, 1); 1447 1448 if (do_replace[i]) { 1449 f2fs_i_blocks_write(src_inode, 1450 1, false, false); 1451 f2fs_i_blocks_write(dst_inode, 1452 1, true, false); 1453 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, 1454 blkaddr[i], ni.version, true, false); 1455 1456 do_replace[i] = 0; 1457 } 1458 dn.ofs_in_node++; 1459 i++; 1460 new_size = (loff_t)(dst + i) << PAGE_SHIFT; 1461 if (dst_inode->i_size < new_size) 1462 f2fs_i_size_write(dst_inode, new_size); 1463 } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR)); 1464 1465 f2fs_put_dnode(&dn); 1466 } else { 1467 struct folio *fsrc, *fdst; 1468 1469 fsrc = f2fs_get_lock_data_folio(src_inode, 1470 src + i, true); 1471 if (IS_ERR(fsrc)) 1472 return PTR_ERR(fsrc); 1473 fdst = f2fs_get_new_data_folio(dst_inode, NULL, dst + i, 1474 true); 1475 if (IS_ERR(fdst)) { 1476 f2fs_folio_put(fsrc, true); 1477 return PTR_ERR(fdst); 1478 } 1479 1480 f2fs_folio_wait_writeback(fdst, DATA, true, true); 1481 1482 memcpy_folio(fdst, 0, fsrc, 0, PAGE_SIZE); 1483 folio_mark_dirty(fdst); 1484 folio_set_f2fs_gcing(fdst); 1485 f2fs_folio_put(fdst, true); 1486 f2fs_folio_put(fsrc, true); 1487 1488 ret = f2fs_truncate_hole(src_inode, 1489 src + i, src + i + 1); 1490 if (ret) 1491 return ret; 1492 i++; 1493 } 1494 } 1495 return 0; 1496 } 1497 1498 static int __exchange_data_block(struct inode *src_inode, 1499 struct inode *dst_inode, pgoff_t src, pgoff_t dst, 1500 pgoff_t len, bool full) 1501 { 1502 block_t *src_blkaddr; 1503 int *do_replace; 1504 pgoff_t olen; 1505 int ret; 1506 1507 while (len) { 1508 olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len); 1509 1510 src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode), 1511 array_size(olen, sizeof(block_t)), 1512 GFP_NOFS); 1513 if (!src_blkaddr) 1514 return -ENOMEM; 1515 1516 do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode), 1517 array_size(olen, sizeof(int)), 1518 GFP_NOFS); 1519 if (!do_replace) { 1520 kvfree(src_blkaddr); 1521 return -ENOMEM; 1522 } 1523 1524 ret = __read_out_blkaddrs(src_inode, src_blkaddr, 1525 do_replace, src, olen); 1526 if (ret) 1527 goto roll_back; 1528 1529 ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr, 1530 do_replace, src, dst, olen, full); 1531 if (ret) 1532 goto roll_back; 1533 1534 src += olen; 1535 dst += olen; 1536 len -= olen; 1537 1538 kvfree(src_blkaddr); 1539 kvfree(do_replace); 1540 } 1541 return 0; 1542 1543 roll_back: 1544 __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen); 1545 kvfree(src_blkaddr); 1546 kvfree(do_replace); 1547 return ret; 1548 } 1549 1550 static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) 1551 { 1552 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1553 pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 1554 pgoff_t start = offset >> PAGE_SHIFT; 1555 pgoff_t end = (offset + len) >> PAGE_SHIFT; 1556 int ret; 1557 1558 f2fs_balance_fs(sbi, true); 1559 1560 /* avoid gc operation during block exchange */ 1561 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1562 filemap_invalidate_lock(inode->i_mapping); 1563 1564 f2fs_zero_post_eof_page(inode, offset + len, false); 1565 1566 f2fs_lock_op(sbi); 1567 f2fs_drop_extent_tree(inode); 1568 truncate_pagecache(inode, offset); 1569 ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true); 1570 f2fs_unlock_op(sbi); 1571 1572 filemap_invalidate_unlock(inode->i_mapping); 1573 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1574 return ret; 1575 } 1576 1577 static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) 1578 { 1579 loff_t new_size; 1580 int ret; 1581 1582 if (offset + len >= i_size_read(inode)) 1583 return -EINVAL; 1584 1585 /* collapse range should be aligned to block size of f2fs. */ 1586 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) 1587 return -EINVAL; 1588 1589 ret = f2fs_convert_inline_inode(inode); 1590 if (ret) 1591 return ret; 1592 1593 /* write out all dirty pages from offset */ 1594 ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); 1595 if (ret) 1596 return ret; 1597 1598 ret = f2fs_do_collapse(inode, offset, len); 1599 if (ret) 1600 return ret; 1601 1602 /* write out all moved pages, if possible */ 1603 filemap_invalidate_lock(inode->i_mapping); 1604 filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); 1605 truncate_pagecache(inode, offset); 1606 1607 new_size = i_size_read(inode) - len; 1608 ret = f2fs_truncate_blocks(inode, new_size, true); 1609 filemap_invalidate_unlock(inode->i_mapping); 1610 if (!ret) 1611 f2fs_i_size_write(inode, new_size); 1612 return ret; 1613 } 1614 1615 static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, 1616 pgoff_t end) 1617 { 1618 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1619 pgoff_t index = start; 1620 unsigned int ofs_in_node = dn->ofs_in_node; 1621 blkcnt_t count = 0; 1622 int ret; 1623 1624 for (; index < end; index++, dn->ofs_in_node++) { 1625 if (f2fs_data_blkaddr(dn) == NULL_ADDR) 1626 count++; 1627 } 1628 1629 dn->ofs_in_node = ofs_in_node; 1630 ret = f2fs_reserve_new_blocks(dn, count); 1631 if (ret) 1632 return ret; 1633 1634 dn->ofs_in_node = ofs_in_node; 1635 for (index = start; index < end; index++, dn->ofs_in_node++) { 1636 dn->data_blkaddr = f2fs_data_blkaddr(dn); 1637 /* 1638 * f2fs_reserve_new_blocks will not guarantee entire block 1639 * allocation. 1640 */ 1641 if (dn->data_blkaddr == NULL_ADDR) { 1642 ret = -ENOSPC; 1643 break; 1644 } 1645 1646 if (dn->data_blkaddr == NEW_ADDR) 1647 continue; 1648 1649 if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr, 1650 DATA_GENERIC_ENHANCE)) { 1651 ret = -EFSCORRUPTED; 1652 break; 1653 } 1654 1655 f2fs_invalidate_blocks(sbi, dn->data_blkaddr, 1); 1656 f2fs_set_data_blkaddr(dn, NEW_ADDR); 1657 } 1658 1659 if (index > start) { 1660 f2fs_update_read_extent_cache_range(dn, start, 0, 1661 index - start); 1662 f2fs_update_age_extent_cache_range(dn, start, index - start); 1663 } 1664 1665 return ret; 1666 } 1667 1668 static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, 1669 int mode) 1670 { 1671 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1672 struct address_space *mapping = inode->i_mapping; 1673 pgoff_t index, pg_start, pg_end; 1674 loff_t new_size = i_size_read(inode); 1675 loff_t off_start, off_end; 1676 int ret = 0; 1677 1678 ret = inode_newsize_ok(inode, (len + offset)); 1679 if (ret) 1680 return ret; 1681 1682 ret = f2fs_convert_inline_inode(inode); 1683 if (ret) 1684 return ret; 1685 1686 ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); 1687 if (ret) 1688 return ret; 1689 1690 f2fs_zero_post_eof_page(inode, offset + len, true); 1691 1692 pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; 1693 pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; 1694 1695 off_start = offset & (PAGE_SIZE - 1); 1696 off_end = (offset + len) & (PAGE_SIZE - 1); 1697 1698 if (pg_start == pg_end) { 1699 ret = fill_zero(inode, pg_start, off_start, 1700 off_end - off_start); 1701 if (ret) 1702 return ret; 1703 1704 new_size = max_t(loff_t, new_size, offset + len); 1705 } else { 1706 if (off_start) { 1707 ret = fill_zero(inode, pg_start++, off_start, 1708 PAGE_SIZE - off_start); 1709 if (ret) 1710 return ret; 1711 1712 new_size = max_t(loff_t, new_size, 1713 (loff_t)pg_start << PAGE_SHIFT); 1714 } 1715 1716 for (index = pg_start; index < pg_end;) { 1717 struct dnode_of_data dn; 1718 unsigned int end_offset; 1719 pgoff_t end; 1720 1721 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1722 filemap_invalidate_lock(mapping); 1723 1724 truncate_pagecache_range(inode, 1725 (loff_t)index << PAGE_SHIFT, 1726 ((loff_t)pg_end << PAGE_SHIFT) - 1); 1727 1728 f2fs_lock_op(sbi); 1729 1730 set_new_dnode(&dn, inode, NULL, NULL, 0); 1731 ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE); 1732 if (ret) { 1733 f2fs_unlock_op(sbi); 1734 filemap_invalidate_unlock(mapping); 1735 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1736 goto out; 1737 } 1738 1739 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 1740 end = min(pg_end, end_offset - dn.ofs_in_node + index); 1741 1742 ret = f2fs_do_zero_range(&dn, index, end); 1743 f2fs_put_dnode(&dn); 1744 1745 f2fs_unlock_op(sbi); 1746 filemap_invalidate_unlock(mapping); 1747 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1748 1749 f2fs_balance_fs(sbi, dn.node_changed); 1750 1751 if (ret) 1752 goto out; 1753 1754 index = end; 1755 new_size = max_t(loff_t, new_size, 1756 (loff_t)index << PAGE_SHIFT); 1757 } 1758 1759 if (off_end) { 1760 ret = fill_zero(inode, pg_end, 0, off_end); 1761 if (ret) 1762 goto out; 1763 1764 new_size = max_t(loff_t, new_size, offset + len); 1765 } 1766 } 1767 1768 out: 1769 if (new_size > i_size_read(inode)) { 1770 if (mode & FALLOC_FL_KEEP_SIZE) 1771 file_set_keep_isize(inode); 1772 else 1773 f2fs_i_size_write(inode, new_size); 1774 } 1775 return ret; 1776 } 1777 1778 static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) 1779 { 1780 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1781 struct address_space *mapping = inode->i_mapping; 1782 pgoff_t nr, pg_start, pg_end, delta, idx; 1783 loff_t new_size; 1784 int ret = 0; 1785 1786 new_size = i_size_read(inode) + len; 1787 ret = inode_newsize_ok(inode, new_size); 1788 if (ret) 1789 return ret; 1790 1791 if (offset >= i_size_read(inode)) 1792 return -EINVAL; 1793 1794 /* insert range should be aligned to block size of f2fs. */ 1795 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) 1796 return -EINVAL; 1797 1798 ret = f2fs_convert_inline_inode(inode); 1799 if (ret) 1800 return ret; 1801 1802 f2fs_balance_fs(sbi, true); 1803 1804 filemap_invalidate_lock(mapping); 1805 ret = f2fs_truncate_blocks(inode, i_size_read(inode), true); 1806 filemap_invalidate_unlock(mapping); 1807 if (ret) 1808 return ret; 1809 1810 /* write out all dirty pages from offset */ 1811 ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX); 1812 if (ret) 1813 return ret; 1814 1815 pg_start = offset >> PAGE_SHIFT; 1816 pg_end = (offset + len) >> PAGE_SHIFT; 1817 delta = pg_end - pg_start; 1818 idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 1819 1820 /* avoid gc operation during block exchange */ 1821 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1822 filemap_invalidate_lock(mapping); 1823 1824 f2fs_zero_post_eof_page(inode, offset + len, false); 1825 truncate_pagecache(inode, offset); 1826 1827 while (!ret && idx > pg_start) { 1828 nr = idx - pg_start; 1829 if (nr > delta) 1830 nr = delta; 1831 idx -= nr; 1832 1833 f2fs_lock_op(sbi); 1834 f2fs_drop_extent_tree(inode); 1835 1836 ret = __exchange_data_block(inode, inode, idx, 1837 idx + delta, nr, false); 1838 f2fs_unlock_op(sbi); 1839 } 1840 filemap_invalidate_unlock(mapping); 1841 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1842 if (ret) 1843 return ret; 1844 1845 /* write out all moved pages, if possible */ 1846 filemap_invalidate_lock(mapping); 1847 ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX); 1848 truncate_pagecache(inode, offset); 1849 filemap_invalidate_unlock(mapping); 1850 1851 if (!ret) 1852 f2fs_i_size_write(inode, new_size); 1853 return ret; 1854 } 1855 1856 static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, 1857 loff_t len, int mode) 1858 { 1859 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1860 struct f2fs_map_blocks map = { .m_next_pgofs = NULL, 1861 .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, 1862 .m_may_create = true }; 1863 struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, 1864 .init_gc_type = FG_GC, 1865 .should_migrate_blocks = false, 1866 .err_gc_skipped = true, 1867 .nr_free_secs = 0 }; 1868 pgoff_t pg_start, pg_end; 1869 loff_t new_size; 1870 loff_t off_end; 1871 block_t expanded = 0; 1872 int err; 1873 1874 err = inode_newsize_ok(inode, (len + offset)); 1875 if (err) 1876 return err; 1877 1878 err = f2fs_convert_inline_inode(inode); 1879 if (err) 1880 return err; 1881 1882 f2fs_zero_post_eof_page(inode, offset + len, true); 1883 1884 f2fs_balance_fs(sbi, true); 1885 1886 pg_start = ((unsigned long long)offset) >> PAGE_SHIFT; 1887 pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT; 1888 off_end = (offset + len) & (PAGE_SIZE - 1); 1889 1890 map.m_lblk = pg_start; 1891 map.m_len = pg_end - pg_start; 1892 if (off_end) 1893 map.m_len++; 1894 1895 if (!map.m_len) 1896 return 0; 1897 1898 if (f2fs_is_pinned_file(inode)) { 1899 block_t sec_blks = CAP_BLKS_PER_SEC(sbi); 1900 block_t sec_len = roundup(map.m_len, sec_blks); 1901 1902 map.m_len = sec_blks; 1903 next_alloc: 1904 f2fs_down_write(&sbi->pin_sem); 1905 1906 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 1907 if (has_not_enough_free_secs(sbi, 0, 0)) { 1908 f2fs_up_write(&sbi->pin_sem); 1909 err = -ENOSPC; 1910 f2fs_warn_ratelimited(sbi, 1911 "ino:%lu, start:%lu, end:%lu, need to trigger GC to " 1912 "reclaim enough free segment when checkpoint is enabled", 1913 inode->i_ino, pg_start, pg_end); 1914 goto out_err; 1915 } 1916 } 1917 1918 if (has_not_enough_free_secs(sbi, 0, 1919 sbi->reserved_pin_section)) { 1920 f2fs_down_write(&sbi->gc_lock); 1921 stat_inc_gc_call_count(sbi, FOREGROUND); 1922 err = f2fs_gc(sbi, &gc_control); 1923 if (err && err != -ENODATA) { 1924 f2fs_up_write(&sbi->pin_sem); 1925 goto out_err; 1926 } 1927 } 1928 1929 err = f2fs_allocate_pinning_section(sbi); 1930 if (err) { 1931 f2fs_up_write(&sbi->pin_sem); 1932 goto out_err; 1933 } 1934 1935 map.m_seg_type = CURSEG_COLD_DATA_PINNED; 1936 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO); 1937 file_dont_truncate(inode); 1938 1939 f2fs_up_write(&sbi->pin_sem); 1940 1941 expanded += map.m_len; 1942 sec_len -= map.m_len; 1943 map.m_lblk += map.m_len; 1944 if (!err && sec_len) 1945 goto next_alloc; 1946 1947 map.m_len = expanded; 1948 } else { 1949 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_AIO); 1950 expanded = map.m_len; 1951 } 1952 out_err: 1953 if (err) { 1954 pgoff_t last_off; 1955 1956 if (!expanded) 1957 return err; 1958 1959 last_off = pg_start + expanded - 1; 1960 1961 /* update new size to the failed position */ 1962 new_size = (last_off == pg_end) ? offset + len : 1963 (loff_t)(last_off + 1) << PAGE_SHIFT; 1964 } else { 1965 new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; 1966 } 1967 1968 if (new_size > i_size_read(inode)) { 1969 if (mode & FALLOC_FL_KEEP_SIZE) 1970 file_set_keep_isize(inode); 1971 else 1972 f2fs_i_size_write(inode, new_size); 1973 } 1974 1975 return err; 1976 } 1977 1978 static long f2fs_fallocate(struct file *file, int mode, 1979 loff_t offset, loff_t len) 1980 { 1981 struct inode *inode = file_inode(file); 1982 long ret = 0; 1983 1984 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 1985 return -EIO; 1986 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) 1987 return -ENOSPC; 1988 if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) 1989 return -EOPNOTSUPP; 1990 1991 /* f2fs only support ->fallocate for regular file */ 1992 if (!S_ISREG(inode->i_mode)) 1993 return -EINVAL; 1994 1995 if (IS_ENCRYPTED(inode) && 1996 (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) 1997 return -EOPNOTSUPP; 1998 1999 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 2000 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | 2001 FALLOC_FL_INSERT_RANGE)) 2002 return -EOPNOTSUPP; 2003 2004 inode_lock(inode); 2005 2006 /* 2007 * Pinned file should not support partial truncation since the block 2008 * can be used by applications. 2009 */ 2010 if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && 2011 (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | 2012 FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) { 2013 ret = -EOPNOTSUPP; 2014 goto out; 2015 } 2016 2017 ret = file_modified(file); 2018 if (ret) 2019 goto out; 2020 2021 /* 2022 * wait for inflight dio, blocks should be removed after IO 2023 * completion. 2024 */ 2025 inode_dio_wait(inode); 2026 2027 if (mode & FALLOC_FL_PUNCH_HOLE) { 2028 if (offset >= inode->i_size) 2029 goto out; 2030 2031 ret = f2fs_punch_hole(inode, offset, len); 2032 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { 2033 ret = f2fs_collapse_range(inode, offset, len); 2034 } else if (mode & FALLOC_FL_ZERO_RANGE) { 2035 ret = f2fs_zero_range(inode, offset, len, mode); 2036 } else if (mode & FALLOC_FL_INSERT_RANGE) { 2037 ret = f2fs_insert_range(inode, offset, len); 2038 } else { 2039 ret = f2fs_expand_inode_data(inode, offset, len, mode); 2040 } 2041 2042 if (!ret) { 2043 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 2044 f2fs_mark_inode_dirty_sync(inode, false); 2045 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2046 } 2047 2048 out: 2049 inode_unlock(inode); 2050 2051 trace_f2fs_fallocate(inode, mode, offset, len, ret); 2052 return ret; 2053 } 2054 2055 static int f2fs_release_file(struct inode *inode, struct file *filp) 2056 { 2057 if (atomic_dec_and_test(&F2FS_I(inode)->open_count)) 2058 f2fs_remove_donate_inode(inode); 2059 2060 /* 2061 * f2fs_release_file is called at every close calls. So we should 2062 * not drop any inmemory pages by close called by other process. 2063 */ 2064 if (!(filp->f_mode & FMODE_WRITE) || 2065 atomic_read(&inode->i_writecount) != 1) 2066 return 0; 2067 2068 inode_lock(inode); 2069 f2fs_abort_atomic_write(inode, true); 2070 inode_unlock(inode); 2071 2072 return 0; 2073 } 2074 2075 static int f2fs_file_flush(struct file *file, fl_owner_t id) 2076 { 2077 struct inode *inode = file_inode(file); 2078 2079 /* 2080 * If the process doing a transaction is crashed, we should do 2081 * roll-back. Otherwise, other reader/write can see corrupted database 2082 * until all the writers close its file. Since this should be done 2083 * before dropping file lock, it needs to do in ->flush. 2084 */ 2085 if (F2FS_I(inode)->atomic_write_task == current && 2086 (current->flags & PF_EXITING)) { 2087 inode_lock(inode); 2088 f2fs_abort_atomic_write(inode, true); 2089 inode_unlock(inode); 2090 } 2091 2092 return 0; 2093 } 2094 2095 static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) 2096 { 2097 struct f2fs_inode_info *fi = F2FS_I(inode); 2098 u32 masked_flags = fi->i_flags & mask; 2099 2100 /* mask can be shrunk by flags_valid selector */ 2101 iflags &= mask; 2102 2103 /* Is it quota file? Do not allow user to mess with it */ 2104 if (IS_NOQUOTA(inode)) 2105 return -EPERM; 2106 2107 if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) { 2108 if (!f2fs_sb_has_casefold(F2FS_I_SB(inode))) 2109 return -EOPNOTSUPP; 2110 if (!f2fs_empty_dir(inode)) 2111 return -ENOTEMPTY; 2112 } 2113 2114 if (iflags & (F2FS_COMPR_FL | F2FS_NOCOMP_FL)) { 2115 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 2116 return -EOPNOTSUPP; 2117 if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL)) 2118 return -EINVAL; 2119 } 2120 2121 if ((iflags ^ masked_flags) & F2FS_COMPR_FL) { 2122 if (masked_flags & F2FS_COMPR_FL) { 2123 if (!f2fs_disable_compressed_file(inode)) 2124 return -EINVAL; 2125 } else { 2126 /* try to convert inline_data to support compression */ 2127 int err = f2fs_convert_inline_inode(inode); 2128 if (err) 2129 return err; 2130 2131 f2fs_down_write(&fi->i_sem); 2132 if (!f2fs_may_compress(inode) || 2133 atomic_read(&fi->writeback) || 2134 (S_ISREG(inode->i_mode) && 2135 F2FS_HAS_BLOCKS(inode))) { 2136 f2fs_up_write(&fi->i_sem); 2137 return -EINVAL; 2138 } 2139 err = set_compress_context(inode); 2140 f2fs_up_write(&fi->i_sem); 2141 2142 if (err) 2143 return err; 2144 } 2145 } 2146 2147 fi->i_flags = iflags | (fi->i_flags & ~mask); 2148 f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) && 2149 (fi->i_flags & F2FS_NOCOMP_FL)); 2150 2151 if (fi->i_flags & F2FS_PROJINHERIT_FL) 2152 set_inode_flag(inode, FI_PROJ_INHERIT); 2153 else 2154 clear_inode_flag(inode, FI_PROJ_INHERIT); 2155 2156 inode_set_ctime_current(inode); 2157 f2fs_set_inode_flags(inode); 2158 f2fs_mark_inode_dirty_sync(inode, true); 2159 return 0; 2160 } 2161 2162 /* FS_IOC_[GS]ETFLAGS and FS_IOC_FS[GS]ETXATTR support */ 2163 2164 /* 2165 * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry 2166 * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to 2167 * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add 2168 * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL. 2169 * 2170 * Translating flags to fsx_flags value used by FS_IOC_FSGETXATTR and 2171 * FS_IOC_FSSETXATTR is done by the VFS. 2172 */ 2173 2174 static const struct { 2175 u32 iflag; 2176 u32 fsflag; 2177 } f2fs_fsflags_map[] = { 2178 { F2FS_COMPR_FL, FS_COMPR_FL }, 2179 { F2FS_SYNC_FL, FS_SYNC_FL }, 2180 { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL }, 2181 { F2FS_APPEND_FL, FS_APPEND_FL }, 2182 { F2FS_NODUMP_FL, FS_NODUMP_FL }, 2183 { F2FS_NOATIME_FL, FS_NOATIME_FL }, 2184 { F2FS_NOCOMP_FL, FS_NOCOMP_FL }, 2185 { F2FS_INDEX_FL, FS_INDEX_FL }, 2186 { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL }, 2187 { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL }, 2188 { F2FS_CASEFOLD_FL, FS_CASEFOLD_FL }, 2189 }; 2190 2191 #define F2FS_GETTABLE_FS_FL ( \ 2192 FS_COMPR_FL | \ 2193 FS_SYNC_FL | \ 2194 FS_IMMUTABLE_FL | \ 2195 FS_APPEND_FL | \ 2196 FS_NODUMP_FL | \ 2197 FS_NOATIME_FL | \ 2198 FS_NOCOMP_FL | \ 2199 FS_INDEX_FL | \ 2200 FS_DIRSYNC_FL | \ 2201 FS_PROJINHERIT_FL | \ 2202 FS_ENCRYPT_FL | \ 2203 FS_INLINE_DATA_FL | \ 2204 FS_NOCOW_FL | \ 2205 FS_VERITY_FL | \ 2206 FS_CASEFOLD_FL) 2207 2208 #define F2FS_SETTABLE_FS_FL ( \ 2209 FS_COMPR_FL | \ 2210 FS_SYNC_FL | \ 2211 FS_IMMUTABLE_FL | \ 2212 FS_APPEND_FL | \ 2213 FS_NODUMP_FL | \ 2214 FS_NOATIME_FL | \ 2215 FS_NOCOMP_FL | \ 2216 FS_DIRSYNC_FL | \ 2217 FS_PROJINHERIT_FL | \ 2218 FS_CASEFOLD_FL) 2219 2220 /* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */ 2221 static inline u32 f2fs_iflags_to_fsflags(u32 iflags) 2222 { 2223 u32 fsflags = 0; 2224 int i; 2225 2226 for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) 2227 if (iflags & f2fs_fsflags_map[i].iflag) 2228 fsflags |= f2fs_fsflags_map[i].fsflag; 2229 2230 return fsflags; 2231 } 2232 2233 /* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */ 2234 static inline u32 f2fs_fsflags_to_iflags(u32 fsflags) 2235 { 2236 u32 iflags = 0; 2237 int i; 2238 2239 for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) 2240 if (fsflags & f2fs_fsflags_map[i].fsflag) 2241 iflags |= f2fs_fsflags_map[i].iflag; 2242 2243 return iflags; 2244 } 2245 2246 static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) 2247 { 2248 struct inode *inode = file_inode(filp); 2249 2250 return put_user(inode->i_generation, (int __user *)arg); 2251 } 2252 2253 static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) 2254 { 2255 struct inode *inode = file_inode(filp); 2256 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2257 struct f2fs_inode_info *fi = F2FS_I(inode); 2258 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2259 loff_t isize; 2260 int ret; 2261 2262 if (!(filp->f_mode & FMODE_WRITE)) 2263 return -EBADF; 2264 2265 if (!inode_owner_or_capable(idmap, inode)) 2266 return -EACCES; 2267 2268 if (!S_ISREG(inode->i_mode)) 2269 return -EINVAL; 2270 2271 if (filp->f_flags & O_DIRECT) 2272 return -EINVAL; 2273 2274 ret = mnt_want_write_file(filp); 2275 if (ret) 2276 return ret; 2277 2278 inode_lock(inode); 2279 2280 if (!f2fs_disable_compressed_file(inode) || 2281 f2fs_is_pinned_file(inode)) { 2282 ret = -EINVAL; 2283 goto out; 2284 } 2285 2286 if (f2fs_is_atomic_file(inode)) 2287 goto out; 2288 2289 ret = f2fs_convert_inline_inode(inode); 2290 if (ret) 2291 goto out; 2292 2293 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 2294 f2fs_down_write(&fi->i_gc_rwsem[READ]); 2295 2296 /* 2297 * Should wait end_io to count F2FS_WB_CP_DATA correctly by 2298 * f2fs_is_atomic_file. 2299 */ 2300 if (get_dirty_pages(inode)) 2301 f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u", 2302 inode->i_ino, get_dirty_pages(inode)); 2303 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 2304 if (ret) 2305 goto out_unlock; 2306 2307 /* Check if the inode already has a COW inode */ 2308 if (fi->cow_inode == NULL) { 2309 /* Create a COW inode for atomic write */ 2310 struct dentry *dentry = file_dentry(filp); 2311 struct inode *dir = d_inode(dentry->d_parent); 2312 2313 ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode); 2314 if (ret) 2315 goto out_unlock; 2316 2317 set_inode_flag(fi->cow_inode, FI_COW_FILE); 2318 clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); 2319 2320 /* Set the COW inode's atomic_inode to the atomic inode */ 2321 F2FS_I(fi->cow_inode)->atomic_inode = inode; 2322 } else { 2323 /* Reuse the already created COW inode */ 2324 f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode)); 2325 2326 invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1); 2327 2328 ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); 2329 if (ret) 2330 goto out_unlock; 2331 } 2332 2333 f2fs_write_inode(inode, NULL); 2334 2335 stat_inc_atomic_inode(inode); 2336 2337 set_inode_flag(inode, FI_ATOMIC_FILE); 2338 2339 isize = i_size_read(inode); 2340 fi->original_i_size = isize; 2341 if (truncate) { 2342 set_inode_flag(inode, FI_ATOMIC_REPLACE); 2343 truncate_inode_pages_final(inode->i_mapping); 2344 f2fs_i_size_write(inode, 0); 2345 isize = 0; 2346 } 2347 f2fs_i_size_write(fi->cow_inode, isize); 2348 2349 out_unlock: 2350 f2fs_up_write(&fi->i_gc_rwsem[READ]); 2351 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 2352 if (ret) 2353 goto out; 2354 2355 f2fs_update_time(sbi, REQ_TIME); 2356 fi->atomic_write_task = current; 2357 stat_update_max_atomic_write(inode); 2358 fi->atomic_write_cnt = 0; 2359 out: 2360 inode_unlock(inode); 2361 mnt_drop_write_file(filp); 2362 return ret; 2363 } 2364 2365 static int f2fs_ioc_commit_atomic_write(struct file *filp) 2366 { 2367 struct inode *inode = file_inode(filp); 2368 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2369 int ret; 2370 2371 if (!(filp->f_mode & FMODE_WRITE)) 2372 return -EBADF; 2373 2374 if (!inode_owner_or_capable(idmap, inode)) 2375 return -EACCES; 2376 2377 ret = mnt_want_write_file(filp); 2378 if (ret) 2379 return ret; 2380 2381 f2fs_balance_fs(F2FS_I_SB(inode), true); 2382 2383 inode_lock(inode); 2384 2385 if (f2fs_is_atomic_file(inode)) { 2386 ret = f2fs_commit_atomic_write(inode); 2387 if (!ret) 2388 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); 2389 2390 f2fs_abort_atomic_write(inode, ret); 2391 } else { 2392 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); 2393 } 2394 2395 inode_unlock(inode); 2396 mnt_drop_write_file(filp); 2397 return ret; 2398 } 2399 2400 static int f2fs_ioc_abort_atomic_write(struct file *filp) 2401 { 2402 struct inode *inode = file_inode(filp); 2403 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2404 int ret; 2405 2406 if (!(filp->f_mode & FMODE_WRITE)) 2407 return -EBADF; 2408 2409 if (!inode_owner_or_capable(idmap, inode)) 2410 return -EACCES; 2411 2412 ret = mnt_want_write_file(filp); 2413 if (ret) 2414 return ret; 2415 2416 inode_lock(inode); 2417 2418 f2fs_abort_atomic_write(inode, true); 2419 2420 inode_unlock(inode); 2421 2422 mnt_drop_write_file(filp); 2423 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2424 return ret; 2425 } 2426 2427 int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, 2428 bool readonly, bool need_lock) 2429 { 2430 struct super_block *sb = sbi->sb; 2431 int ret = 0; 2432 2433 switch (flag) { 2434 case F2FS_GOING_DOWN_FULLSYNC: 2435 ret = bdev_freeze(sb->s_bdev); 2436 if (ret) 2437 goto out; 2438 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2439 bdev_thaw(sb->s_bdev); 2440 break; 2441 case F2FS_GOING_DOWN_METASYNC: 2442 /* do checkpoint only */ 2443 ret = f2fs_sync_fs(sb, 1); 2444 if (ret) { 2445 if (ret == -EIO) 2446 ret = 0; 2447 goto out; 2448 } 2449 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2450 break; 2451 case F2FS_GOING_DOWN_NOSYNC: 2452 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2453 break; 2454 case F2FS_GOING_DOWN_METAFLUSH: 2455 f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); 2456 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2457 break; 2458 case F2FS_GOING_DOWN_NEED_FSCK: 2459 set_sbi_flag(sbi, SBI_NEED_FSCK); 2460 set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); 2461 set_sbi_flag(sbi, SBI_IS_DIRTY); 2462 /* do checkpoint only */ 2463 ret = f2fs_sync_fs(sb, 1); 2464 if (ret == -EIO) 2465 ret = 0; 2466 goto out; 2467 default: 2468 ret = -EINVAL; 2469 goto out; 2470 } 2471 2472 if (readonly) 2473 goto out; 2474 2475 /* 2476 * grab sb->s_umount to avoid racing w/ remount() and other shutdown 2477 * paths. 2478 */ 2479 if (need_lock) 2480 down_write(&sbi->sb->s_umount); 2481 2482 f2fs_stop_gc_thread(sbi); 2483 f2fs_stop_discard_thread(sbi); 2484 2485 f2fs_drop_discard_cmd(sbi); 2486 clear_opt(sbi, DISCARD); 2487 2488 if (need_lock) 2489 up_write(&sbi->sb->s_umount); 2490 2491 f2fs_update_time(sbi, REQ_TIME); 2492 out: 2493 2494 trace_f2fs_shutdown(sbi, flag, ret); 2495 2496 return ret; 2497 } 2498 2499 static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) 2500 { 2501 struct inode *inode = file_inode(filp); 2502 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2503 __u32 in; 2504 int ret; 2505 bool need_drop = false, readonly = false; 2506 2507 if (!capable(CAP_SYS_ADMIN)) 2508 return -EPERM; 2509 2510 if (get_user(in, (__u32 __user *)arg)) 2511 return -EFAULT; 2512 2513 if (in != F2FS_GOING_DOWN_FULLSYNC) { 2514 ret = mnt_want_write_file(filp); 2515 if (ret) { 2516 if (ret != -EROFS) 2517 return ret; 2518 2519 /* fallback to nosync shutdown for readonly fs */ 2520 in = F2FS_GOING_DOWN_NOSYNC; 2521 readonly = true; 2522 } else { 2523 need_drop = true; 2524 } 2525 } 2526 2527 ret = f2fs_do_shutdown(sbi, in, readonly, true); 2528 2529 if (need_drop) 2530 mnt_drop_write_file(filp); 2531 2532 return ret; 2533 } 2534 2535 static int f2fs_keep_noreuse_range(struct inode *inode, 2536 loff_t offset, loff_t len) 2537 { 2538 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2539 u64 max_bytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 2540 u64 start, end; 2541 int ret = 0; 2542 2543 if (!S_ISREG(inode->i_mode)) 2544 return 0; 2545 2546 if (offset >= max_bytes || len > max_bytes || 2547 (offset + len) > max_bytes) 2548 return 0; 2549 2550 start = offset >> PAGE_SHIFT; 2551 end = DIV_ROUND_UP(offset + len, PAGE_SIZE); 2552 2553 inode_lock(inode); 2554 if (f2fs_is_atomic_file(inode)) { 2555 inode_unlock(inode); 2556 return 0; 2557 } 2558 2559 spin_lock(&sbi->inode_lock[DONATE_INODE]); 2560 /* let's remove the range, if len = 0 */ 2561 if (!len) { 2562 if (!list_empty(&F2FS_I(inode)->gdonate_list)) { 2563 list_del_init(&F2FS_I(inode)->gdonate_list); 2564 sbi->donate_files--; 2565 if (is_inode_flag_set(inode, FI_DONATE_FINISHED)) 2566 ret = -EALREADY; 2567 else 2568 set_inode_flag(inode, FI_DONATE_FINISHED); 2569 } else 2570 ret = -ENOENT; 2571 } else { 2572 if (list_empty(&F2FS_I(inode)->gdonate_list)) { 2573 list_add_tail(&F2FS_I(inode)->gdonate_list, 2574 &sbi->inode_list[DONATE_INODE]); 2575 sbi->donate_files++; 2576 } else { 2577 list_move_tail(&F2FS_I(inode)->gdonate_list, 2578 &sbi->inode_list[DONATE_INODE]); 2579 } 2580 F2FS_I(inode)->donate_start = start; 2581 F2FS_I(inode)->donate_end = end - 1; 2582 clear_inode_flag(inode, FI_DONATE_FINISHED); 2583 } 2584 spin_unlock(&sbi->inode_lock[DONATE_INODE]); 2585 inode_unlock(inode); 2586 2587 return ret; 2588 } 2589 2590 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) 2591 { 2592 struct inode *inode = file_inode(filp); 2593 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2594 struct fstrim_range range; 2595 int ret; 2596 2597 if (!capable(CAP_SYS_ADMIN)) 2598 return -EPERM; 2599 2600 if (!f2fs_hw_support_discard(sbi)) 2601 return -EOPNOTSUPP; 2602 2603 if (copy_from_user(&range, (struct fstrim_range __user *)arg, 2604 sizeof(range))) 2605 return -EFAULT; 2606 2607 ret = mnt_want_write_file(filp); 2608 if (ret) 2609 return ret; 2610 2611 range.minlen = max_t(unsigned int, range.minlen, 2612 f2fs_hw_discard_granularity(sbi)); 2613 ret = f2fs_trim_fs(sbi, &range); 2614 mnt_drop_write_file(filp); 2615 if (ret < 0) 2616 return ret; 2617 2618 if (copy_to_user((struct fstrim_range __user *)arg, &range, 2619 sizeof(range))) 2620 return -EFAULT; 2621 f2fs_update_time(sbi, REQ_TIME); 2622 return 0; 2623 } 2624 2625 static bool uuid_is_nonzero(__u8 u[16]) 2626 { 2627 int i; 2628 2629 for (i = 0; i < 16; i++) 2630 if (u[i]) 2631 return true; 2632 return false; 2633 } 2634 2635 static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) 2636 { 2637 struct inode *inode = file_inode(filp); 2638 int ret; 2639 2640 if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode))) 2641 return -EOPNOTSUPP; 2642 2643 ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg); 2644 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2645 return ret; 2646 } 2647 2648 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) 2649 { 2650 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2651 return -EOPNOTSUPP; 2652 return fscrypt_ioctl_get_policy(filp, (void __user *)arg); 2653 } 2654 2655 static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) 2656 { 2657 struct inode *inode = file_inode(filp); 2658 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2659 u8 encrypt_pw_salt[16]; 2660 int err; 2661 2662 if (!f2fs_sb_has_encrypt(sbi)) 2663 return -EOPNOTSUPP; 2664 2665 err = mnt_want_write_file(filp); 2666 if (err) 2667 return err; 2668 2669 f2fs_down_write(&sbi->sb_lock); 2670 2671 if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) 2672 goto got_it; 2673 2674 /* update superblock with uuid */ 2675 generate_random_uuid(sbi->raw_super->encrypt_pw_salt); 2676 2677 err = f2fs_commit_super(sbi, false); 2678 if (err) { 2679 /* undo new data */ 2680 memset(sbi->raw_super->encrypt_pw_salt, 0, 16); 2681 goto out_err; 2682 } 2683 got_it: 2684 memcpy(encrypt_pw_salt, sbi->raw_super->encrypt_pw_salt, 16); 2685 out_err: 2686 f2fs_up_write(&sbi->sb_lock); 2687 mnt_drop_write_file(filp); 2688 2689 if (!err && copy_to_user((__u8 __user *)arg, encrypt_pw_salt, 16)) 2690 err = -EFAULT; 2691 2692 return err; 2693 } 2694 2695 static int f2fs_ioc_get_encryption_policy_ex(struct file *filp, 2696 unsigned long arg) 2697 { 2698 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2699 return -EOPNOTSUPP; 2700 2701 return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg); 2702 } 2703 2704 static int f2fs_ioc_add_encryption_key(struct file *filp, unsigned long arg) 2705 { 2706 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2707 return -EOPNOTSUPP; 2708 2709 return fscrypt_ioctl_add_key(filp, (void __user *)arg); 2710 } 2711 2712 static int f2fs_ioc_remove_encryption_key(struct file *filp, unsigned long arg) 2713 { 2714 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2715 return -EOPNOTSUPP; 2716 2717 return fscrypt_ioctl_remove_key(filp, (void __user *)arg); 2718 } 2719 2720 static int f2fs_ioc_remove_encryption_key_all_users(struct file *filp, 2721 unsigned long arg) 2722 { 2723 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2724 return -EOPNOTSUPP; 2725 2726 return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg); 2727 } 2728 2729 static int f2fs_ioc_get_encryption_key_status(struct file *filp, 2730 unsigned long arg) 2731 { 2732 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2733 return -EOPNOTSUPP; 2734 2735 return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); 2736 } 2737 2738 static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg) 2739 { 2740 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2741 return -EOPNOTSUPP; 2742 2743 return fscrypt_ioctl_get_nonce(filp, (void __user *)arg); 2744 } 2745 2746 static int f2fs_ioc_gc(struct file *filp, unsigned long arg) 2747 { 2748 struct inode *inode = file_inode(filp); 2749 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2750 struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, 2751 .no_bg_gc = false, 2752 .should_migrate_blocks = false, 2753 .nr_free_secs = 0 }; 2754 __u32 sync; 2755 int ret; 2756 2757 if (!capable(CAP_SYS_ADMIN)) 2758 return -EPERM; 2759 2760 if (get_user(sync, (__u32 __user *)arg)) 2761 return -EFAULT; 2762 2763 if (f2fs_readonly(sbi->sb)) 2764 return -EROFS; 2765 2766 ret = mnt_want_write_file(filp); 2767 if (ret) 2768 return ret; 2769 2770 if (!sync) { 2771 if (!f2fs_down_write_trylock(&sbi->gc_lock)) { 2772 ret = -EBUSY; 2773 goto out; 2774 } 2775 } else { 2776 f2fs_down_write(&sbi->gc_lock); 2777 } 2778 2779 gc_control.init_gc_type = sync ? FG_GC : BG_GC; 2780 gc_control.err_gc_skipped = sync; 2781 stat_inc_gc_call_count(sbi, FOREGROUND); 2782 ret = f2fs_gc(sbi, &gc_control); 2783 out: 2784 mnt_drop_write_file(filp); 2785 return ret; 2786 } 2787 2788 static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range) 2789 { 2790 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); 2791 struct f2fs_gc_control gc_control = { 2792 .init_gc_type = range->sync ? FG_GC : BG_GC, 2793 .no_bg_gc = false, 2794 .should_migrate_blocks = false, 2795 .err_gc_skipped = range->sync, 2796 .nr_free_secs = 0 }; 2797 u64 end; 2798 int ret; 2799 2800 if (!capable(CAP_SYS_ADMIN)) 2801 return -EPERM; 2802 if (f2fs_readonly(sbi->sb)) 2803 return -EROFS; 2804 2805 end = range->start + range->len; 2806 if (end < range->start || range->start < MAIN_BLKADDR(sbi) || 2807 end >= MAX_BLKADDR(sbi)) 2808 return -EINVAL; 2809 2810 ret = mnt_want_write_file(filp); 2811 if (ret) 2812 return ret; 2813 2814 do_more: 2815 if (!range->sync) { 2816 if (!f2fs_down_write_trylock(&sbi->gc_lock)) { 2817 ret = -EBUSY; 2818 goto out; 2819 } 2820 } else { 2821 f2fs_down_write(&sbi->gc_lock); 2822 } 2823 2824 gc_control.victim_segno = GET_SEGNO(sbi, range->start); 2825 stat_inc_gc_call_count(sbi, FOREGROUND); 2826 ret = f2fs_gc(sbi, &gc_control); 2827 if (ret) { 2828 if (ret == -EBUSY) 2829 ret = -EAGAIN; 2830 goto out; 2831 } 2832 range->start += CAP_BLKS_PER_SEC(sbi); 2833 if (range->start <= end) 2834 goto do_more; 2835 out: 2836 mnt_drop_write_file(filp); 2837 return ret; 2838 } 2839 2840 static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) 2841 { 2842 struct f2fs_gc_range range; 2843 2844 if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg, 2845 sizeof(range))) 2846 return -EFAULT; 2847 return __f2fs_ioc_gc_range(filp, &range); 2848 } 2849 2850 static int f2fs_ioc_write_checkpoint(struct file *filp) 2851 { 2852 struct inode *inode = file_inode(filp); 2853 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2854 int ret; 2855 2856 if (!capable(CAP_SYS_ADMIN)) 2857 return -EPERM; 2858 2859 if (f2fs_readonly(sbi->sb)) 2860 return -EROFS; 2861 2862 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 2863 f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled."); 2864 return -EINVAL; 2865 } 2866 2867 ret = mnt_want_write_file(filp); 2868 if (ret) 2869 return ret; 2870 2871 ret = f2fs_sync_fs(sbi->sb, 1); 2872 2873 mnt_drop_write_file(filp); 2874 return ret; 2875 } 2876 2877 static int f2fs_defragment_range(struct f2fs_sb_info *sbi, 2878 struct file *filp, 2879 struct f2fs_defragment *range) 2880 { 2881 struct inode *inode = file_inode(filp); 2882 struct f2fs_map_blocks map = { .m_next_extent = NULL, 2883 .m_seg_type = NO_CHECK_TYPE, 2884 .m_may_create = false }; 2885 struct extent_info ei = {}; 2886 pgoff_t pg_start, pg_end, next_pgofs; 2887 unsigned int total = 0, sec_num; 2888 block_t blk_end = 0; 2889 bool fragmented = false; 2890 int err; 2891 2892 f2fs_balance_fs(sbi, true); 2893 2894 inode_lock(inode); 2895 pg_start = range->start >> PAGE_SHIFT; 2896 pg_end = min_t(pgoff_t, 2897 (range->start + range->len) >> PAGE_SHIFT, 2898 DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE)); 2899 2900 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) || 2901 f2fs_is_atomic_file(inode)) { 2902 err = -EINVAL; 2903 goto unlock_out; 2904 } 2905 2906 /* if in-place-update policy is enabled, don't waste time here */ 2907 set_inode_flag(inode, FI_OPU_WRITE); 2908 if (f2fs_should_update_inplace(inode, NULL)) { 2909 err = -EINVAL; 2910 goto out; 2911 } 2912 2913 /* writeback all dirty pages in the range */ 2914 err = filemap_write_and_wait_range(inode->i_mapping, 2915 pg_start << PAGE_SHIFT, 2916 (pg_end << PAGE_SHIFT) - 1); 2917 if (err) 2918 goto out; 2919 2920 /* 2921 * lookup mapping info in extent cache, skip defragmenting if physical 2922 * block addresses are continuous. 2923 */ 2924 if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) { 2925 if ((pgoff_t)ei.fofs + ei.len >= pg_end) 2926 goto out; 2927 } 2928 2929 map.m_lblk = pg_start; 2930 map.m_next_pgofs = &next_pgofs; 2931 2932 /* 2933 * lookup mapping info in dnode page cache, skip defragmenting if all 2934 * physical block addresses are continuous even if there are hole(s) 2935 * in logical blocks. 2936 */ 2937 while (map.m_lblk < pg_end) { 2938 map.m_len = pg_end - map.m_lblk; 2939 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); 2940 if (err) 2941 goto out; 2942 2943 if (!(map.m_flags & F2FS_MAP_FLAGS)) { 2944 map.m_lblk = next_pgofs; 2945 continue; 2946 } 2947 2948 if (blk_end && blk_end != map.m_pblk) 2949 fragmented = true; 2950 2951 /* record total count of block that we're going to move */ 2952 total += map.m_len; 2953 2954 blk_end = map.m_pblk + map.m_len; 2955 2956 map.m_lblk += map.m_len; 2957 } 2958 2959 if (!fragmented) { 2960 total = 0; 2961 goto out; 2962 } 2963 2964 sec_num = DIV_ROUND_UP(total, CAP_BLKS_PER_SEC(sbi)); 2965 2966 /* 2967 * make sure there are enough free section for LFS allocation, this can 2968 * avoid defragment running in SSR mode when free section are allocated 2969 * intensively 2970 */ 2971 if (has_not_enough_free_secs(sbi, 0, sec_num)) { 2972 err = -EAGAIN; 2973 goto out; 2974 } 2975 2976 map.m_lblk = pg_start; 2977 map.m_len = pg_end - pg_start; 2978 total = 0; 2979 2980 while (map.m_lblk < pg_end) { 2981 pgoff_t idx; 2982 int cnt = 0; 2983 2984 do_map: 2985 map.m_len = pg_end - map.m_lblk; 2986 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); 2987 if (err) 2988 goto clear_out; 2989 2990 if (!(map.m_flags & F2FS_MAP_FLAGS)) { 2991 map.m_lblk = next_pgofs; 2992 goto check; 2993 } 2994 2995 set_inode_flag(inode, FI_SKIP_WRITES); 2996 2997 idx = map.m_lblk; 2998 while (idx < map.m_lblk + map.m_len && 2999 cnt < BLKS_PER_SEG(sbi)) { 3000 struct folio *folio; 3001 3002 folio = f2fs_get_lock_data_folio(inode, idx, true); 3003 if (IS_ERR(folio)) { 3004 err = PTR_ERR(folio); 3005 goto clear_out; 3006 } 3007 3008 f2fs_folio_wait_writeback(folio, DATA, true, true); 3009 3010 folio_mark_dirty(folio); 3011 folio_set_f2fs_gcing(folio); 3012 f2fs_folio_put(folio, true); 3013 3014 idx++; 3015 cnt++; 3016 total++; 3017 } 3018 3019 map.m_lblk = idx; 3020 check: 3021 if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi)) 3022 goto do_map; 3023 3024 clear_inode_flag(inode, FI_SKIP_WRITES); 3025 3026 err = filemap_fdatawrite(inode->i_mapping); 3027 if (err) 3028 goto out; 3029 } 3030 clear_out: 3031 clear_inode_flag(inode, FI_SKIP_WRITES); 3032 out: 3033 clear_inode_flag(inode, FI_OPU_WRITE); 3034 unlock_out: 3035 inode_unlock(inode); 3036 if (!err) 3037 range->len = (u64)total << PAGE_SHIFT; 3038 return err; 3039 } 3040 3041 static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) 3042 { 3043 struct inode *inode = file_inode(filp); 3044 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3045 struct f2fs_defragment range; 3046 int err; 3047 3048 if (!capable(CAP_SYS_ADMIN)) 3049 return -EPERM; 3050 3051 if (!S_ISREG(inode->i_mode)) 3052 return -EINVAL; 3053 3054 if (f2fs_readonly(sbi->sb)) 3055 return -EROFS; 3056 3057 if (copy_from_user(&range, (struct f2fs_defragment __user *)arg, 3058 sizeof(range))) 3059 return -EFAULT; 3060 3061 /* verify alignment of offset & size */ 3062 if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1)) 3063 return -EINVAL; 3064 3065 if (unlikely((range.start + range.len) >> PAGE_SHIFT > 3066 max_file_blocks(inode))) 3067 return -EINVAL; 3068 3069 err = mnt_want_write_file(filp); 3070 if (err) 3071 return err; 3072 3073 err = f2fs_defragment_range(sbi, filp, &range); 3074 mnt_drop_write_file(filp); 3075 3076 if (range.len) 3077 f2fs_update_time(sbi, REQ_TIME); 3078 if (err < 0) 3079 return err; 3080 3081 if (copy_to_user((struct f2fs_defragment __user *)arg, &range, 3082 sizeof(range))) 3083 return -EFAULT; 3084 3085 return 0; 3086 } 3087 3088 static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, 3089 struct file *file_out, loff_t pos_out, size_t len) 3090 { 3091 struct inode *src = file_inode(file_in); 3092 struct inode *dst = file_inode(file_out); 3093 struct f2fs_sb_info *sbi = F2FS_I_SB(src); 3094 size_t olen = len, dst_max_i_size = 0; 3095 size_t dst_osize; 3096 int ret; 3097 3098 if (file_in->f_path.mnt != file_out->f_path.mnt || 3099 src->i_sb != dst->i_sb) 3100 return -EXDEV; 3101 3102 if (unlikely(f2fs_readonly(src->i_sb))) 3103 return -EROFS; 3104 3105 if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode)) 3106 return -EINVAL; 3107 3108 if (IS_ENCRYPTED(src) || IS_ENCRYPTED(dst)) 3109 return -EOPNOTSUPP; 3110 3111 if (pos_out < 0 || pos_in < 0) 3112 return -EINVAL; 3113 3114 if (src == dst) { 3115 if (pos_in == pos_out) 3116 return 0; 3117 if (pos_out > pos_in && pos_out < pos_in + len) 3118 return -EINVAL; 3119 } 3120 3121 inode_lock(src); 3122 if (src != dst) { 3123 ret = -EBUSY; 3124 if (!inode_trylock(dst)) 3125 goto out; 3126 } 3127 3128 if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) || 3129 f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) { 3130 ret = -EOPNOTSUPP; 3131 goto out_unlock; 3132 } 3133 3134 if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) { 3135 ret = -EINVAL; 3136 goto out_unlock; 3137 } 3138 3139 ret = -EINVAL; 3140 if (pos_in + len > src->i_size || pos_in + len < pos_in) 3141 goto out_unlock; 3142 if (len == 0) 3143 olen = len = src->i_size - pos_in; 3144 if (pos_in + len == src->i_size) 3145 len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in; 3146 if (len == 0) { 3147 ret = 0; 3148 goto out_unlock; 3149 } 3150 3151 dst_osize = dst->i_size; 3152 if (pos_out + olen > dst->i_size) 3153 dst_max_i_size = pos_out + olen; 3154 3155 /* verify the end result is block aligned */ 3156 if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) || 3157 !IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) || 3158 !IS_ALIGNED(pos_out, F2FS_BLKSIZE)) 3159 goto out_unlock; 3160 3161 ret = f2fs_convert_inline_inode(src); 3162 if (ret) 3163 goto out_unlock; 3164 3165 ret = f2fs_convert_inline_inode(dst); 3166 if (ret) 3167 goto out_unlock; 3168 3169 /* write out all dirty pages from offset */ 3170 ret = filemap_write_and_wait_range(src->i_mapping, 3171 pos_in, pos_in + len); 3172 if (ret) 3173 goto out_unlock; 3174 3175 ret = filemap_write_and_wait_range(dst->i_mapping, 3176 pos_out, pos_out + len); 3177 if (ret) 3178 goto out_unlock; 3179 3180 f2fs_balance_fs(sbi, true); 3181 3182 f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); 3183 if (src != dst) { 3184 ret = -EBUSY; 3185 if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) 3186 goto out_src; 3187 } 3188 3189 f2fs_lock_op(sbi); 3190 ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in), 3191 F2FS_BYTES_TO_BLK(pos_out), 3192 F2FS_BYTES_TO_BLK(len), false); 3193 3194 if (!ret) { 3195 if (dst_max_i_size) 3196 f2fs_i_size_write(dst, dst_max_i_size); 3197 else if (dst_osize != dst->i_size) 3198 f2fs_i_size_write(dst, dst_osize); 3199 } 3200 f2fs_unlock_op(sbi); 3201 3202 if (src != dst) 3203 f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); 3204 out_src: 3205 f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); 3206 if (ret) 3207 goto out_unlock; 3208 3209 inode_set_mtime_to_ts(src, inode_set_ctime_current(src)); 3210 f2fs_mark_inode_dirty_sync(src, false); 3211 if (src != dst) { 3212 inode_set_mtime_to_ts(dst, inode_set_ctime_current(dst)); 3213 f2fs_mark_inode_dirty_sync(dst, false); 3214 } 3215 f2fs_update_time(sbi, REQ_TIME); 3216 3217 out_unlock: 3218 if (src != dst) 3219 inode_unlock(dst); 3220 out: 3221 inode_unlock(src); 3222 return ret; 3223 } 3224 3225 static int __f2fs_ioc_move_range(struct file *filp, 3226 struct f2fs_move_range *range) 3227 { 3228 int err; 3229 3230 if (!(filp->f_mode & FMODE_READ) || 3231 !(filp->f_mode & FMODE_WRITE)) 3232 return -EBADF; 3233 3234 CLASS(fd, dst)(range->dst_fd); 3235 if (fd_empty(dst)) 3236 return -EBADF; 3237 3238 if (!(fd_file(dst)->f_mode & FMODE_WRITE)) 3239 return -EBADF; 3240 3241 err = mnt_want_write_file(filp); 3242 if (err) 3243 return err; 3244 3245 err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst), 3246 range->pos_out, range->len); 3247 3248 mnt_drop_write_file(filp); 3249 return err; 3250 } 3251 3252 static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) 3253 { 3254 struct f2fs_move_range range; 3255 3256 if (copy_from_user(&range, (struct f2fs_move_range __user *)arg, 3257 sizeof(range))) 3258 return -EFAULT; 3259 return __f2fs_ioc_move_range(filp, &range); 3260 } 3261 3262 static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) 3263 { 3264 struct inode *inode = file_inode(filp); 3265 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3266 struct sit_info *sm = SIT_I(sbi); 3267 unsigned int start_segno = 0, end_segno = 0; 3268 unsigned int dev_start_segno = 0, dev_end_segno = 0; 3269 struct f2fs_flush_device range; 3270 struct f2fs_gc_control gc_control = { 3271 .init_gc_type = FG_GC, 3272 .should_migrate_blocks = true, 3273 .err_gc_skipped = true, 3274 .nr_free_secs = 0 }; 3275 int ret; 3276 3277 if (!capable(CAP_SYS_ADMIN)) 3278 return -EPERM; 3279 3280 if (f2fs_readonly(sbi->sb)) 3281 return -EROFS; 3282 3283 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) 3284 return -EINVAL; 3285 3286 if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, 3287 sizeof(range))) 3288 return -EFAULT; 3289 3290 if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num || 3291 __is_large_section(sbi)) { 3292 f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1", 3293 range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi)); 3294 return -EINVAL; 3295 } 3296 3297 ret = mnt_want_write_file(filp); 3298 if (ret) 3299 return ret; 3300 3301 if (range.dev_num != 0) 3302 dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk); 3303 dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk); 3304 3305 start_segno = sm->last_victim[FLUSH_DEVICE]; 3306 if (start_segno < dev_start_segno || start_segno >= dev_end_segno) 3307 start_segno = dev_start_segno; 3308 end_segno = min(start_segno + range.segments, dev_end_segno); 3309 3310 while (start_segno < end_segno) { 3311 if (!f2fs_down_write_trylock(&sbi->gc_lock)) { 3312 ret = -EBUSY; 3313 goto out; 3314 } 3315 sm->last_victim[GC_CB] = end_segno + 1; 3316 sm->last_victim[GC_GREEDY] = end_segno + 1; 3317 sm->last_victim[ALLOC_NEXT] = end_segno + 1; 3318 3319 gc_control.victim_segno = start_segno; 3320 stat_inc_gc_call_count(sbi, FOREGROUND); 3321 ret = f2fs_gc(sbi, &gc_control); 3322 if (ret == -EAGAIN) 3323 ret = 0; 3324 else if (ret < 0) 3325 break; 3326 start_segno++; 3327 } 3328 out: 3329 mnt_drop_write_file(filp); 3330 return ret; 3331 } 3332 3333 static int f2fs_ioc_get_features(struct file *filp, unsigned long arg) 3334 { 3335 struct inode *inode = file_inode(filp); 3336 u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature); 3337 3338 /* Must validate to set it with SQLite behavior in Android. */ 3339 sb_feature |= F2FS_FEATURE_ATOMIC_WRITE; 3340 3341 return put_user(sb_feature, (u32 __user *)arg); 3342 } 3343 3344 #ifdef CONFIG_QUOTA 3345 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) 3346 { 3347 struct dquot *transfer_to[MAXQUOTAS] = {}; 3348 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3349 struct super_block *sb = sbi->sb; 3350 int err; 3351 3352 transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); 3353 if (IS_ERR(transfer_to[PRJQUOTA])) 3354 return PTR_ERR(transfer_to[PRJQUOTA]); 3355 3356 err = __dquot_transfer(inode, transfer_to); 3357 if (err) 3358 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); 3359 dqput(transfer_to[PRJQUOTA]); 3360 return err; 3361 } 3362 3363 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) 3364 { 3365 struct f2fs_inode_info *fi = F2FS_I(inode); 3366 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3367 struct f2fs_inode *ri = NULL; 3368 kprojid_t kprojid; 3369 int err; 3370 3371 if (!f2fs_sb_has_project_quota(sbi)) { 3372 if (projid != F2FS_DEF_PROJID) 3373 return -EOPNOTSUPP; 3374 else 3375 return 0; 3376 } 3377 3378 if (!f2fs_has_extra_attr(inode)) 3379 return -EOPNOTSUPP; 3380 3381 kprojid = make_kprojid(&init_user_ns, (projid_t)projid); 3382 3383 if (projid_eq(kprojid, fi->i_projid)) 3384 return 0; 3385 3386 err = -EPERM; 3387 /* Is it quota file? Do not allow user to mess with it */ 3388 if (IS_NOQUOTA(inode)) 3389 return err; 3390 3391 if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) 3392 return -EOVERFLOW; 3393 3394 err = f2fs_dquot_initialize(inode); 3395 if (err) 3396 return err; 3397 3398 f2fs_lock_op(sbi); 3399 err = f2fs_transfer_project_quota(inode, kprojid); 3400 if (err) 3401 goto out_unlock; 3402 3403 fi->i_projid = kprojid; 3404 inode_set_ctime_current(inode); 3405 f2fs_mark_inode_dirty_sync(inode, true); 3406 out_unlock: 3407 f2fs_unlock_op(sbi); 3408 return err; 3409 } 3410 #else 3411 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) 3412 { 3413 return 0; 3414 } 3415 3416 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) 3417 { 3418 if (projid != F2FS_DEF_PROJID) 3419 return -EOPNOTSUPP; 3420 return 0; 3421 } 3422 #endif 3423 3424 int f2fs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) 3425 { 3426 struct inode *inode = d_inode(dentry); 3427 struct f2fs_inode_info *fi = F2FS_I(inode); 3428 u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags); 3429 3430 if (IS_ENCRYPTED(inode)) 3431 fsflags |= FS_ENCRYPT_FL; 3432 if (IS_VERITY(inode)) 3433 fsflags |= FS_VERITY_FL; 3434 if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) 3435 fsflags |= FS_INLINE_DATA_FL; 3436 if (is_inode_flag_set(inode, FI_PIN_FILE)) 3437 fsflags |= FS_NOCOW_FL; 3438 3439 fileattr_fill_flags(fa, fsflags & F2FS_GETTABLE_FS_FL); 3440 3441 if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) 3442 fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid); 3443 3444 return 0; 3445 } 3446 3447 int f2fs_fileattr_set(struct mnt_idmap *idmap, 3448 struct dentry *dentry, struct file_kattr *fa) 3449 { 3450 struct inode *inode = d_inode(dentry); 3451 u32 fsflags = fa->flags, mask = F2FS_SETTABLE_FS_FL; 3452 u32 iflags; 3453 int err; 3454 3455 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 3456 return -EIO; 3457 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) 3458 return -ENOSPC; 3459 if (fsflags & ~F2FS_GETTABLE_FS_FL) 3460 return -EOPNOTSUPP; 3461 fsflags &= F2FS_SETTABLE_FS_FL; 3462 if (!fa->flags_valid) 3463 mask &= FS_COMMON_FL; 3464 3465 iflags = f2fs_fsflags_to_iflags(fsflags); 3466 if (f2fs_mask_flags(inode->i_mode, iflags) != iflags) 3467 return -EOPNOTSUPP; 3468 3469 err = f2fs_setflags_common(inode, iflags, f2fs_fsflags_to_iflags(mask)); 3470 if (!err) 3471 err = f2fs_ioc_setproject(inode, fa->fsx_projid); 3472 3473 return err; 3474 } 3475 3476 int f2fs_pin_file_control(struct inode *inode, bool inc) 3477 { 3478 struct f2fs_inode_info *fi = F2FS_I(inode); 3479 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3480 3481 if (IS_DEVICE_ALIASING(inode)) 3482 return -EINVAL; 3483 3484 if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { 3485 f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", 3486 __func__, inode->i_ino, fi->i_gc_failures); 3487 clear_inode_flag(inode, FI_PIN_FILE); 3488 return -EAGAIN; 3489 } 3490 3491 /* Use i_gc_failures for normal file as a risk signal. */ 3492 if (inc) 3493 f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1); 3494 3495 return 0; 3496 } 3497 3498 static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) 3499 { 3500 struct inode *inode = file_inode(filp); 3501 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3502 __u32 pin; 3503 int ret = 0; 3504 3505 if (get_user(pin, (__u32 __user *)arg)) 3506 return -EFAULT; 3507 3508 if (!S_ISREG(inode->i_mode)) 3509 return -EINVAL; 3510 3511 if (f2fs_readonly(sbi->sb)) 3512 return -EROFS; 3513 3514 if (!pin && IS_DEVICE_ALIASING(inode)) 3515 return -EOPNOTSUPP; 3516 3517 ret = mnt_want_write_file(filp); 3518 if (ret) 3519 return ret; 3520 3521 inode_lock(inode); 3522 3523 if (f2fs_is_atomic_file(inode)) { 3524 ret = -EINVAL; 3525 goto out; 3526 } 3527 3528 if (!pin) { 3529 clear_inode_flag(inode, FI_PIN_FILE); 3530 f2fs_i_gc_failures_write(inode, 0); 3531 goto done; 3532 } else if (f2fs_is_pinned_file(inode)) { 3533 goto done; 3534 } 3535 3536 if (F2FS_HAS_BLOCKS(inode)) { 3537 ret = -EFBIG; 3538 goto out; 3539 } 3540 3541 /* Let's allow file pinning on zoned device. */ 3542 if (!f2fs_sb_has_blkzoned(sbi) && 3543 f2fs_should_update_outplace(inode, NULL)) { 3544 ret = -EINVAL; 3545 goto out; 3546 } 3547 3548 if (f2fs_pin_file_control(inode, false)) { 3549 ret = -EAGAIN; 3550 goto out; 3551 } 3552 3553 ret = f2fs_convert_inline_inode(inode); 3554 if (ret) 3555 goto out; 3556 3557 if (!f2fs_disable_compressed_file(inode)) { 3558 ret = -EOPNOTSUPP; 3559 goto out; 3560 } 3561 3562 set_inode_flag(inode, FI_PIN_FILE); 3563 ret = F2FS_I(inode)->i_gc_failures; 3564 done: 3565 f2fs_update_time(sbi, REQ_TIME); 3566 out: 3567 inode_unlock(inode); 3568 mnt_drop_write_file(filp); 3569 return ret; 3570 } 3571 3572 static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) 3573 { 3574 struct inode *inode = file_inode(filp); 3575 __u32 pin = 0; 3576 3577 if (is_inode_flag_set(inode, FI_PIN_FILE)) 3578 pin = F2FS_I(inode)->i_gc_failures; 3579 return put_user(pin, (u32 __user *)arg); 3580 } 3581 3582 static int f2fs_ioc_get_dev_alias_file(struct file *filp, unsigned long arg) 3583 { 3584 return put_user(IS_DEVICE_ALIASING(file_inode(filp)) ? 1 : 0, 3585 (u32 __user *)arg); 3586 } 3587 3588 static int f2fs_ioc_io_prio(struct file *filp, unsigned long arg) 3589 { 3590 struct inode *inode = file_inode(filp); 3591 __u32 level; 3592 3593 if (get_user(level, (__u32 __user *)arg)) 3594 return -EFAULT; 3595 3596 if (!S_ISREG(inode->i_mode) || level >= F2FS_IOPRIO_MAX) 3597 return -EINVAL; 3598 3599 inode_lock(inode); 3600 F2FS_I(inode)->ioprio_hint = level; 3601 inode_unlock(inode); 3602 return 0; 3603 } 3604 3605 int f2fs_precache_extents(struct inode *inode) 3606 { 3607 struct f2fs_inode_info *fi = F2FS_I(inode); 3608 struct f2fs_map_blocks map; 3609 pgoff_t m_next_extent; 3610 loff_t end; 3611 int err; 3612 3613 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 3614 return -EOPNOTSUPP; 3615 3616 map.m_lblk = 0; 3617 map.m_pblk = 0; 3618 map.m_next_pgofs = NULL; 3619 map.m_next_extent = &m_next_extent; 3620 map.m_seg_type = NO_CHECK_TYPE; 3621 map.m_may_create = false; 3622 end = F2FS_BLK_ALIGN(i_size_read(inode)); 3623 3624 while (map.m_lblk < end) { 3625 map.m_len = end - map.m_lblk; 3626 3627 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 3628 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE); 3629 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 3630 if (err || !map.m_len) 3631 return err; 3632 3633 map.m_lblk = m_next_extent; 3634 } 3635 3636 return 0; 3637 } 3638 3639 static int f2fs_ioc_precache_extents(struct file *filp) 3640 { 3641 return f2fs_precache_extents(file_inode(filp)); 3642 } 3643 3644 static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) 3645 { 3646 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); 3647 __u64 block_count; 3648 3649 if (!capable(CAP_SYS_ADMIN)) 3650 return -EPERM; 3651 3652 if (f2fs_readonly(sbi->sb)) 3653 return -EROFS; 3654 3655 if (copy_from_user(&block_count, (void __user *)arg, 3656 sizeof(block_count))) 3657 return -EFAULT; 3658 3659 return f2fs_resize_fs(filp, block_count); 3660 } 3661 3662 static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg) 3663 { 3664 struct inode *inode = file_inode(filp); 3665 3666 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 3667 3668 if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) { 3669 f2fs_warn(F2FS_I_SB(inode), 3670 "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem", 3671 inode->i_ino); 3672 return -EOPNOTSUPP; 3673 } 3674 3675 return fsverity_ioctl_enable(filp, (const void __user *)arg); 3676 } 3677 3678 static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg) 3679 { 3680 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp)))) 3681 return -EOPNOTSUPP; 3682 3683 return fsverity_ioctl_measure(filp, (void __user *)arg); 3684 } 3685 3686 static int f2fs_ioc_read_verity_metadata(struct file *filp, unsigned long arg) 3687 { 3688 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp)))) 3689 return -EOPNOTSUPP; 3690 3691 return fsverity_ioctl_read_metadata(filp, (const void __user *)arg); 3692 } 3693 3694 static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg) 3695 { 3696 struct inode *inode = file_inode(filp); 3697 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3698 char *vbuf; 3699 int count; 3700 int err = 0; 3701 3702 vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL); 3703 if (!vbuf) 3704 return -ENOMEM; 3705 3706 f2fs_down_read(&sbi->sb_lock); 3707 count = utf16s_to_utf8s(sbi->raw_super->volume_name, 3708 ARRAY_SIZE(sbi->raw_super->volume_name), 3709 UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME); 3710 f2fs_up_read(&sbi->sb_lock); 3711 3712 if (copy_to_user((char __user *)arg, vbuf, 3713 min(FSLABEL_MAX, count))) 3714 err = -EFAULT; 3715 3716 kfree(vbuf); 3717 return err; 3718 } 3719 3720 static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg) 3721 { 3722 struct inode *inode = file_inode(filp); 3723 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3724 char *vbuf; 3725 int err = 0; 3726 3727 if (!capable(CAP_SYS_ADMIN)) 3728 return -EPERM; 3729 3730 vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX); 3731 if (IS_ERR(vbuf)) 3732 return PTR_ERR(vbuf); 3733 3734 err = mnt_want_write_file(filp); 3735 if (err) 3736 goto out; 3737 3738 f2fs_down_write(&sbi->sb_lock); 3739 3740 memset(sbi->raw_super->volume_name, 0, 3741 sizeof(sbi->raw_super->volume_name)); 3742 utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN, 3743 sbi->raw_super->volume_name, 3744 ARRAY_SIZE(sbi->raw_super->volume_name)); 3745 3746 err = f2fs_commit_super(sbi, false); 3747 3748 f2fs_up_write(&sbi->sb_lock); 3749 3750 mnt_drop_write_file(filp); 3751 out: 3752 kfree(vbuf); 3753 return err; 3754 } 3755 3756 static int f2fs_get_compress_blocks(struct inode *inode, __u64 *blocks) 3757 { 3758 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 3759 return -EOPNOTSUPP; 3760 3761 if (!f2fs_compressed_file(inode)) 3762 return -EINVAL; 3763 3764 *blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks); 3765 3766 return 0; 3767 } 3768 3769 static int f2fs_ioc_get_compress_blocks(struct file *filp, unsigned long arg) 3770 { 3771 struct inode *inode = file_inode(filp); 3772 __u64 blocks; 3773 int ret; 3774 3775 ret = f2fs_get_compress_blocks(inode, &blocks); 3776 if (ret < 0) 3777 return ret; 3778 3779 return put_user(blocks, (u64 __user *)arg); 3780 } 3781 3782 static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) 3783 { 3784 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 3785 unsigned int released_blocks = 0; 3786 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 3787 block_t blkaddr; 3788 int i; 3789 3790 for (i = 0; i < count; i++) { 3791 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3792 dn->ofs_in_node + i); 3793 3794 if (!__is_valid_data_blkaddr(blkaddr)) 3795 continue; 3796 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, 3797 DATA_GENERIC_ENHANCE))) 3798 return -EFSCORRUPTED; 3799 } 3800 3801 while (count) { 3802 int compr_blocks = 0; 3803 3804 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { 3805 blkaddr = f2fs_data_blkaddr(dn); 3806 3807 if (i == 0) { 3808 if (blkaddr == COMPRESS_ADDR) 3809 continue; 3810 dn->ofs_in_node += cluster_size; 3811 goto next; 3812 } 3813 3814 if (__is_valid_data_blkaddr(blkaddr)) 3815 compr_blocks++; 3816 3817 if (blkaddr != NEW_ADDR) 3818 continue; 3819 3820 f2fs_set_data_blkaddr(dn, NULL_ADDR); 3821 } 3822 3823 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false); 3824 dec_valid_block_count(sbi, dn->inode, 3825 cluster_size - compr_blocks); 3826 3827 released_blocks += cluster_size - compr_blocks; 3828 next: 3829 count -= cluster_size; 3830 } 3831 3832 return released_blocks; 3833 } 3834 3835 static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) 3836 { 3837 struct inode *inode = file_inode(filp); 3838 struct f2fs_inode_info *fi = F2FS_I(inode); 3839 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3840 pgoff_t page_idx = 0, last_idx; 3841 unsigned int released_blocks = 0; 3842 int ret; 3843 int writecount; 3844 3845 if (!f2fs_sb_has_compression(sbi)) 3846 return -EOPNOTSUPP; 3847 3848 if (f2fs_readonly(sbi->sb)) 3849 return -EROFS; 3850 3851 ret = mnt_want_write_file(filp); 3852 if (ret) 3853 return ret; 3854 3855 f2fs_balance_fs(sbi, true); 3856 3857 inode_lock(inode); 3858 3859 writecount = atomic_read(&inode->i_writecount); 3860 if ((filp->f_mode & FMODE_WRITE && writecount != 1) || 3861 (!(filp->f_mode & FMODE_WRITE) && writecount)) { 3862 ret = -EBUSY; 3863 goto out; 3864 } 3865 3866 if (!f2fs_compressed_file(inode) || 3867 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 3868 ret = -EINVAL; 3869 goto out; 3870 } 3871 3872 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 3873 if (ret) 3874 goto out; 3875 3876 if (!atomic_read(&fi->i_compr_blocks)) { 3877 ret = -EPERM; 3878 goto out; 3879 } 3880 3881 set_inode_flag(inode, FI_COMPRESS_RELEASED); 3882 inode_set_ctime_current(inode); 3883 f2fs_mark_inode_dirty_sync(inode, true); 3884 3885 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 3886 filemap_invalidate_lock(inode->i_mapping); 3887 3888 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 3889 3890 while (page_idx < last_idx) { 3891 struct dnode_of_data dn; 3892 pgoff_t end_offset, count; 3893 3894 f2fs_lock_op(sbi); 3895 3896 set_new_dnode(&dn, inode, NULL, NULL, 0); 3897 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); 3898 if (ret) { 3899 f2fs_unlock_op(sbi); 3900 if (ret == -ENOENT) { 3901 page_idx = f2fs_get_next_page_offset(&dn, 3902 page_idx); 3903 ret = 0; 3904 continue; 3905 } 3906 break; 3907 } 3908 3909 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 3910 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); 3911 count = round_up(count, fi->i_cluster_size); 3912 3913 ret = release_compress_blocks(&dn, count); 3914 3915 f2fs_put_dnode(&dn); 3916 3917 f2fs_unlock_op(sbi); 3918 3919 if (ret < 0) 3920 break; 3921 3922 page_idx += count; 3923 released_blocks += ret; 3924 } 3925 3926 filemap_invalidate_unlock(inode->i_mapping); 3927 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 3928 out: 3929 if (released_blocks) 3930 f2fs_update_time(sbi, REQ_TIME); 3931 inode_unlock(inode); 3932 3933 mnt_drop_write_file(filp); 3934 3935 if (ret >= 0) { 3936 ret = put_user(released_blocks, (u64 __user *)arg); 3937 } else if (released_blocks && 3938 atomic_read(&fi->i_compr_blocks)) { 3939 set_sbi_flag(sbi, SBI_NEED_FSCK); 3940 f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " 3941 "iblocks=%llu, released=%u, compr_blocks=%u, " 3942 "run fsck to fix.", 3943 __func__, inode->i_ino, inode->i_blocks, 3944 released_blocks, 3945 atomic_read(&fi->i_compr_blocks)); 3946 } 3947 3948 return ret; 3949 } 3950 3951 static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, 3952 unsigned int *reserved_blocks) 3953 { 3954 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 3955 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 3956 block_t blkaddr; 3957 int i; 3958 3959 for (i = 0; i < count; i++) { 3960 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3961 dn->ofs_in_node + i); 3962 3963 if (!__is_valid_data_blkaddr(blkaddr)) 3964 continue; 3965 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, 3966 DATA_GENERIC_ENHANCE))) 3967 return -EFSCORRUPTED; 3968 } 3969 3970 while (count) { 3971 int compr_blocks = 0; 3972 blkcnt_t reserved = 0; 3973 blkcnt_t to_reserved; 3974 int ret; 3975 3976 for (i = 0; i < cluster_size; i++) { 3977 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3978 dn->ofs_in_node + i); 3979 3980 if (i == 0) { 3981 if (blkaddr != COMPRESS_ADDR) { 3982 dn->ofs_in_node += cluster_size; 3983 goto next; 3984 } 3985 continue; 3986 } 3987 3988 /* 3989 * compressed cluster was not released due to it 3990 * fails in release_compress_blocks(), so NEW_ADDR 3991 * is a possible case. 3992 */ 3993 if (blkaddr == NEW_ADDR) { 3994 reserved++; 3995 continue; 3996 } 3997 if (__is_valid_data_blkaddr(blkaddr)) { 3998 compr_blocks++; 3999 continue; 4000 } 4001 } 4002 4003 to_reserved = cluster_size - compr_blocks - reserved; 4004 4005 /* for the case all blocks in cluster were reserved */ 4006 if (reserved && to_reserved == 1) { 4007 dn->ofs_in_node += cluster_size; 4008 goto next; 4009 } 4010 4011 ret = inc_valid_block_count(sbi, dn->inode, 4012 &to_reserved, false); 4013 if (unlikely(ret)) 4014 return ret; 4015 4016 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { 4017 if (f2fs_data_blkaddr(dn) == NULL_ADDR) 4018 f2fs_set_data_blkaddr(dn, NEW_ADDR); 4019 } 4020 4021 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); 4022 4023 *reserved_blocks += to_reserved; 4024 next: 4025 count -= cluster_size; 4026 } 4027 4028 return 0; 4029 } 4030 4031 static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) 4032 { 4033 struct inode *inode = file_inode(filp); 4034 struct f2fs_inode_info *fi = F2FS_I(inode); 4035 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4036 pgoff_t page_idx = 0, last_idx; 4037 unsigned int reserved_blocks = 0; 4038 int ret; 4039 4040 if (!f2fs_sb_has_compression(sbi)) 4041 return -EOPNOTSUPP; 4042 4043 if (f2fs_readonly(sbi->sb)) 4044 return -EROFS; 4045 4046 ret = mnt_want_write_file(filp); 4047 if (ret) 4048 return ret; 4049 4050 f2fs_balance_fs(sbi, true); 4051 4052 inode_lock(inode); 4053 4054 if (!f2fs_compressed_file(inode) || 4055 !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4056 ret = -EINVAL; 4057 goto unlock_inode; 4058 } 4059 4060 if (atomic_read(&fi->i_compr_blocks)) 4061 goto unlock_inode; 4062 4063 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 4064 filemap_invalidate_lock(inode->i_mapping); 4065 4066 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4067 4068 while (page_idx < last_idx) { 4069 struct dnode_of_data dn; 4070 pgoff_t end_offset, count; 4071 4072 f2fs_lock_op(sbi); 4073 4074 set_new_dnode(&dn, inode, NULL, NULL, 0); 4075 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); 4076 if (ret) { 4077 f2fs_unlock_op(sbi); 4078 if (ret == -ENOENT) { 4079 page_idx = f2fs_get_next_page_offset(&dn, 4080 page_idx); 4081 ret = 0; 4082 continue; 4083 } 4084 break; 4085 } 4086 4087 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 4088 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); 4089 count = round_up(count, fi->i_cluster_size); 4090 4091 ret = reserve_compress_blocks(&dn, count, &reserved_blocks); 4092 4093 f2fs_put_dnode(&dn); 4094 4095 f2fs_unlock_op(sbi); 4096 4097 if (ret < 0) 4098 break; 4099 4100 page_idx += count; 4101 } 4102 4103 filemap_invalidate_unlock(inode->i_mapping); 4104 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 4105 4106 if (!ret) { 4107 clear_inode_flag(inode, FI_COMPRESS_RELEASED); 4108 inode_set_ctime_current(inode); 4109 f2fs_mark_inode_dirty_sync(inode, true); 4110 } 4111 unlock_inode: 4112 if (reserved_blocks) 4113 f2fs_update_time(sbi, REQ_TIME); 4114 inode_unlock(inode); 4115 mnt_drop_write_file(filp); 4116 4117 if (!ret) { 4118 ret = put_user(reserved_blocks, (u64 __user *)arg); 4119 } else if (reserved_blocks && 4120 atomic_read(&fi->i_compr_blocks)) { 4121 set_sbi_flag(sbi, SBI_NEED_FSCK); 4122 f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx " 4123 "iblocks=%llu, reserved=%u, compr_blocks=%u, " 4124 "run fsck to fix.", 4125 __func__, inode->i_ino, inode->i_blocks, 4126 reserved_blocks, 4127 atomic_read(&fi->i_compr_blocks)); 4128 } 4129 4130 return ret; 4131 } 4132 4133 static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode, 4134 pgoff_t off, block_t block, block_t len, u32 flags) 4135 { 4136 sector_t sector = SECTOR_FROM_BLOCK(block); 4137 sector_t nr_sects = SECTOR_FROM_BLOCK(len); 4138 int ret = 0; 4139 4140 if (flags & F2FS_TRIM_FILE_DISCARD) { 4141 if (bdev_max_secure_erase_sectors(bdev)) 4142 ret = blkdev_issue_secure_erase(bdev, sector, nr_sects, 4143 GFP_NOFS); 4144 else 4145 ret = blkdev_issue_discard(bdev, sector, nr_sects, 4146 GFP_NOFS); 4147 } 4148 4149 if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) { 4150 if (IS_ENCRYPTED(inode)) 4151 ret = fscrypt_zeroout_range(inode, off, block, len); 4152 else 4153 ret = blkdev_issue_zeroout(bdev, sector, nr_sects, 4154 GFP_NOFS, 0); 4155 } 4156 4157 return ret; 4158 } 4159 4160 static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) 4161 { 4162 struct inode *inode = file_inode(filp); 4163 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4164 struct address_space *mapping = inode->i_mapping; 4165 struct block_device *prev_bdev = NULL; 4166 struct f2fs_sectrim_range range; 4167 pgoff_t index, pg_end, prev_index = 0; 4168 block_t prev_block = 0, len = 0; 4169 loff_t end_addr; 4170 bool to_end = false; 4171 int ret = 0; 4172 4173 if (!(filp->f_mode & FMODE_WRITE)) 4174 return -EBADF; 4175 4176 if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg, 4177 sizeof(range))) 4178 return -EFAULT; 4179 4180 if (range.flags == 0 || (range.flags & ~F2FS_TRIM_FILE_MASK) || 4181 !S_ISREG(inode->i_mode)) 4182 return -EINVAL; 4183 4184 if (((range.flags & F2FS_TRIM_FILE_DISCARD) && 4185 !f2fs_hw_support_discard(sbi)) || 4186 ((range.flags & F2FS_TRIM_FILE_ZEROOUT) && 4187 IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi))) 4188 return -EOPNOTSUPP; 4189 4190 ret = mnt_want_write_file(filp); 4191 if (ret) 4192 return ret; 4193 inode_lock(inode); 4194 4195 if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) || 4196 range.start >= inode->i_size) { 4197 ret = -EINVAL; 4198 goto err; 4199 } 4200 4201 if (range.len == 0) 4202 goto err; 4203 4204 if (inode->i_size - range.start > range.len) { 4205 end_addr = range.start + range.len; 4206 } else { 4207 end_addr = range.len == (u64)-1 ? 4208 sbi->sb->s_maxbytes : inode->i_size; 4209 to_end = true; 4210 } 4211 4212 if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) || 4213 (!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) { 4214 ret = -EINVAL; 4215 goto err; 4216 } 4217 4218 index = F2FS_BYTES_TO_BLK(range.start); 4219 pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE); 4220 4221 ret = f2fs_convert_inline_inode(inode); 4222 if (ret) 4223 goto err; 4224 4225 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 4226 filemap_invalidate_lock(mapping); 4227 4228 ret = filemap_write_and_wait_range(mapping, range.start, 4229 to_end ? LLONG_MAX : end_addr - 1); 4230 if (ret) 4231 goto out; 4232 4233 truncate_inode_pages_range(mapping, range.start, 4234 to_end ? -1 : end_addr - 1); 4235 4236 while (index < pg_end) { 4237 struct dnode_of_data dn; 4238 pgoff_t end_offset, count; 4239 int i; 4240 4241 set_new_dnode(&dn, inode, NULL, NULL, 0); 4242 ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); 4243 if (ret) { 4244 if (ret == -ENOENT) { 4245 index = f2fs_get_next_page_offset(&dn, index); 4246 continue; 4247 } 4248 goto out; 4249 } 4250 4251 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 4252 count = min(end_offset - dn.ofs_in_node, pg_end - index); 4253 for (i = 0; i < count; i++, index++, dn.ofs_in_node++) { 4254 struct block_device *cur_bdev; 4255 block_t blkaddr = f2fs_data_blkaddr(&dn); 4256 4257 if (!__is_valid_data_blkaddr(blkaddr)) 4258 continue; 4259 4260 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, 4261 DATA_GENERIC_ENHANCE)) { 4262 ret = -EFSCORRUPTED; 4263 f2fs_put_dnode(&dn); 4264 goto out; 4265 } 4266 4267 cur_bdev = f2fs_target_device(sbi, blkaddr, NULL); 4268 if (f2fs_is_multi_device(sbi)) { 4269 int di = f2fs_target_device_index(sbi, blkaddr); 4270 4271 blkaddr -= FDEV(di).start_blk; 4272 } 4273 4274 if (len) { 4275 if (prev_bdev == cur_bdev && 4276 index == prev_index + len && 4277 blkaddr == prev_block + len) { 4278 len++; 4279 } else { 4280 ret = f2fs_secure_erase(prev_bdev, 4281 inode, prev_index, prev_block, 4282 len, range.flags); 4283 if (ret) { 4284 f2fs_put_dnode(&dn); 4285 goto out; 4286 } 4287 4288 len = 0; 4289 } 4290 } 4291 4292 if (!len) { 4293 prev_bdev = cur_bdev; 4294 prev_index = index; 4295 prev_block = blkaddr; 4296 len = 1; 4297 } 4298 } 4299 4300 f2fs_put_dnode(&dn); 4301 4302 if (fatal_signal_pending(current)) { 4303 ret = -EINTR; 4304 goto out; 4305 } 4306 cond_resched(); 4307 } 4308 4309 if (len) 4310 ret = f2fs_secure_erase(prev_bdev, inode, prev_index, 4311 prev_block, len, range.flags); 4312 f2fs_update_time(sbi, REQ_TIME); 4313 out: 4314 filemap_invalidate_unlock(mapping); 4315 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 4316 err: 4317 inode_unlock(inode); 4318 mnt_drop_write_file(filp); 4319 4320 return ret; 4321 } 4322 4323 static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg) 4324 { 4325 struct inode *inode = file_inode(filp); 4326 struct f2fs_comp_option option; 4327 4328 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 4329 return -EOPNOTSUPP; 4330 4331 inode_lock_shared(inode); 4332 4333 if (!f2fs_compressed_file(inode)) { 4334 inode_unlock_shared(inode); 4335 return -ENODATA; 4336 } 4337 4338 option.algorithm = F2FS_I(inode)->i_compress_algorithm; 4339 option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size; 4340 4341 inode_unlock_shared(inode); 4342 4343 if (copy_to_user((struct f2fs_comp_option __user *)arg, &option, 4344 sizeof(option))) 4345 return -EFAULT; 4346 4347 return 0; 4348 } 4349 4350 static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) 4351 { 4352 struct inode *inode = file_inode(filp); 4353 struct f2fs_inode_info *fi = F2FS_I(inode); 4354 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4355 struct f2fs_comp_option option; 4356 int ret = 0; 4357 4358 if (!f2fs_sb_has_compression(sbi)) 4359 return -EOPNOTSUPP; 4360 4361 if (!(filp->f_mode & FMODE_WRITE)) 4362 return -EBADF; 4363 4364 if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg, 4365 sizeof(option))) 4366 return -EFAULT; 4367 4368 if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || 4369 option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || 4370 option.algorithm >= COMPRESS_MAX) 4371 return -EINVAL; 4372 4373 ret = mnt_want_write_file(filp); 4374 if (ret) 4375 return ret; 4376 inode_lock(inode); 4377 4378 f2fs_down_write(&F2FS_I(inode)->i_sem); 4379 if (!f2fs_compressed_file(inode)) { 4380 ret = -EINVAL; 4381 goto out; 4382 } 4383 4384 if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) { 4385 ret = -EBUSY; 4386 goto out; 4387 } 4388 4389 if (F2FS_HAS_BLOCKS(inode)) { 4390 ret = -EFBIG; 4391 goto out; 4392 } 4393 4394 fi->i_compress_algorithm = option.algorithm; 4395 fi->i_log_cluster_size = option.log_cluster_size; 4396 fi->i_cluster_size = BIT(option.log_cluster_size); 4397 /* Set default level */ 4398 if (fi->i_compress_algorithm == COMPRESS_ZSTD) 4399 fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; 4400 else 4401 fi->i_compress_level = 0; 4402 /* Adjust mount option level */ 4403 if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm && 4404 F2FS_OPTION(sbi).compress_level) 4405 fi->i_compress_level = F2FS_OPTION(sbi).compress_level; 4406 f2fs_mark_inode_dirty_sync(inode, true); 4407 4408 if (!f2fs_is_compress_backend_ready(inode)) 4409 f2fs_warn(sbi, "compression algorithm is successfully set, " 4410 "but current kernel doesn't support this algorithm."); 4411 out: 4412 f2fs_up_write(&fi->i_sem); 4413 inode_unlock(inode); 4414 mnt_drop_write_file(filp); 4415 4416 return ret; 4417 } 4418 4419 static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) 4420 { 4421 DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx); 4422 struct address_space *mapping = inode->i_mapping; 4423 struct folio *folio; 4424 pgoff_t redirty_idx = page_idx; 4425 int page_len = 0, ret = 0; 4426 4427 page_cache_ra_unbounded(&ractl, len, 0); 4428 4429 do { 4430 folio = read_cache_folio(mapping, page_idx, NULL, NULL); 4431 if (IS_ERR(folio)) { 4432 ret = PTR_ERR(folio); 4433 break; 4434 } 4435 page_len += folio_nr_pages(folio) - (page_idx - folio->index); 4436 page_idx = folio_next_index(folio); 4437 } while (page_len < len); 4438 4439 do { 4440 folio = filemap_lock_folio(mapping, redirty_idx); 4441 4442 /* It will never fail, when folio has pinned above */ 4443 f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(folio)); 4444 4445 f2fs_folio_wait_writeback(folio, DATA, true, true); 4446 4447 folio_mark_dirty(folio); 4448 folio_set_f2fs_gcing(folio); 4449 redirty_idx = folio_next_index(folio); 4450 folio_unlock(folio); 4451 folio_put_refs(folio, 2); 4452 } while (redirty_idx < page_idx); 4453 4454 return ret; 4455 } 4456 4457 static int f2fs_ioc_decompress_file(struct file *filp) 4458 { 4459 struct inode *inode = file_inode(filp); 4460 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4461 struct f2fs_inode_info *fi = F2FS_I(inode); 4462 pgoff_t page_idx = 0, last_idx, cluster_idx; 4463 int ret; 4464 4465 if (!f2fs_sb_has_compression(sbi) || 4466 F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) 4467 return -EOPNOTSUPP; 4468 4469 if (!(filp->f_mode & FMODE_WRITE)) 4470 return -EBADF; 4471 4472 f2fs_balance_fs(sbi, true); 4473 4474 ret = mnt_want_write_file(filp); 4475 if (ret) 4476 return ret; 4477 inode_lock(inode); 4478 4479 if (!f2fs_is_compress_backend_ready(inode)) { 4480 ret = -EOPNOTSUPP; 4481 goto out; 4482 } 4483 4484 if (!f2fs_compressed_file(inode) || 4485 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4486 ret = -EINVAL; 4487 goto out; 4488 } 4489 4490 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 4491 if (ret) 4492 goto out; 4493 4494 if (!atomic_read(&fi->i_compr_blocks)) 4495 goto out; 4496 4497 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4498 last_idx >>= fi->i_log_cluster_size; 4499 4500 for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { 4501 page_idx = cluster_idx << fi->i_log_cluster_size; 4502 4503 if (!f2fs_is_compressed_cluster(inode, page_idx)) 4504 continue; 4505 4506 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); 4507 if (ret < 0) 4508 break; 4509 4510 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { 4511 ret = filemap_fdatawrite(inode->i_mapping); 4512 if (ret < 0) 4513 break; 4514 } 4515 4516 cond_resched(); 4517 if (fatal_signal_pending(current)) { 4518 ret = -EINTR; 4519 break; 4520 } 4521 } 4522 4523 if (!ret) 4524 ret = filemap_write_and_wait_range(inode->i_mapping, 0, 4525 LLONG_MAX); 4526 4527 if (ret) 4528 f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.", 4529 __func__, ret); 4530 f2fs_update_time(sbi, REQ_TIME); 4531 out: 4532 inode_unlock(inode); 4533 mnt_drop_write_file(filp); 4534 4535 return ret; 4536 } 4537 4538 static int f2fs_ioc_compress_file(struct file *filp) 4539 { 4540 struct inode *inode = file_inode(filp); 4541 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4542 struct f2fs_inode_info *fi = F2FS_I(inode); 4543 pgoff_t page_idx = 0, last_idx, cluster_idx; 4544 int ret; 4545 4546 if (!f2fs_sb_has_compression(sbi) || 4547 F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) 4548 return -EOPNOTSUPP; 4549 4550 if (!(filp->f_mode & FMODE_WRITE)) 4551 return -EBADF; 4552 4553 f2fs_balance_fs(sbi, true); 4554 4555 ret = mnt_want_write_file(filp); 4556 if (ret) 4557 return ret; 4558 inode_lock(inode); 4559 4560 if (!f2fs_is_compress_backend_ready(inode)) { 4561 ret = -EOPNOTSUPP; 4562 goto out; 4563 } 4564 4565 if (!f2fs_compressed_file(inode) || 4566 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4567 ret = -EINVAL; 4568 goto out; 4569 } 4570 4571 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 4572 if (ret) 4573 goto out; 4574 4575 set_inode_flag(inode, FI_ENABLE_COMPRESS); 4576 4577 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4578 last_idx >>= fi->i_log_cluster_size; 4579 4580 for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { 4581 page_idx = cluster_idx << fi->i_log_cluster_size; 4582 4583 if (f2fs_is_sparse_cluster(inode, page_idx)) 4584 continue; 4585 4586 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); 4587 if (ret < 0) 4588 break; 4589 4590 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { 4591 ret = filemap_fdatawrite(inode->i_mapping); 4592 if (ret < 0) 4593 break; 4594 } 4595 4596 cond_resched(); 4597 if (fatal_signal_pending(current)) { 4598 ret = -EINTR; 4599 break; 4600 } 4601 } 4602 4603 if (!ret) 4604 ret = filemap_write_and_wait_range(inode->i_mapping, 0, 4605 LLONG_MAX); 4606 4607 clear_inode_flag(inode, FI_ENABLE_COMPRESS); 4608 4609 if (ret) 4610 f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.", 4611 __func__, ret); 4612 f2fs_update_time(sbi, REQ_TIME); 4613 out: 4614 inode_unlock(inode); 4615 mnt_drop_write_file(filp); 4616 4617 return ret; 4618 } 4619 4620 static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 4621 { 4622 switch (cmd) { 4623 case FS_IOC_GETVERSION: 4624 return f2fs_ioc_getversion(filp, arg); 4625 case F2FS_IOC_START_ATOMIC_WRITE: 4626 return f2fs_ioc_start_atomic_write(filp, false); 4627 case F2FS_IOC_START_ATOMIC_REPLACE: 4628 return f2fs_ioc_start_atomic_write(filp, true); 4629 case F2FS_IOC_COMMIT_ATOMIC_WRITE: 4630 return f2fs_ioc_commit_atomic_write(filp); 4631 case F2FS_IOC_ABORT_ATOMIC_WRITE: 4632 return f2fs_ioc_abort_atomic_write(filp); 4633 case F2FS_IOC_START_VOLATILE_WRITE: 4634 case F2FS_IOC_RELEASE_VOLATILE_WRITE: 4635 return -EOPNOTSUPP; 4636 case F2FS_IOC_SHUTDOWN: 4637 return f2fs_ioc_shutdown(filp, arg); 4638 case FITRIM: 4639 return f2fs_ioc_fitrim(filp, arg); 4640 case FS_IOC_SET_ENCRYPTION_POLICY: 4641 return f2fs_ioc_set_encryption_policy(filp, arg); 4642 case FS_IOC_GET_ENCRYPTION_POLICY: 4643 return f2fs_ioc_get_encryption_policy(filp, arg); 4644 case FS_IOC_GET_ENCRYPTION_PWSALT: 4645 return f2fs_ioc_get_encryption_pwsalt(filp, arg); 4646 case FS_IOC_GET_ENCRYPTION_POLICY_EX: 4647 return f2fs_ioc_get_encryption_policy_ex(filp, arg); 4648 case FS_IOC_ADD_ENCRYPTION_KEY: 4649 return f2fs_ioc_add_encryption_key(filp, arg); 4650 case FS_IOC_REMOVE_ENCRYPTION_KEY: 4651 return f2fs_ioc_remove_encryption_key(filp, arg); 4652 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: 4653 return f2fs_ioc_remove_encryption_key_all_users(filp, arg); 4654 case FS_IOC_GET_ENCRYPTION_KEY_STATUS: 4655 return f2fs_ioc_get_encryption_key_status(filp, arg); 4656 case FS_IOC_GET_ENCRYPTION_NONCE: 4657 return f2fs_ioc_get_encryption_nonce(filp, arg); 4658 case F2FS_IOC_GARBAGE_COLLECT: 4659 return f2fs_ioc_gc(filp, arg); 4660 case F2FS_IOC_GARBAGE_COLLECT_RANGE: 4661 return f2fs_ioc_gc_range(filp, arg); 4662 case F2FS_IOC_WRITE_CHECKPOINT: 4663 return f2fs_ioc_write_checkpoint(filp); 4664 case F2FS_IOC_DEFRAGMENT: 4665 return f2fs_ioc_defragment(filp, arg); 4666 case F2FS_IOC_MOVE_RANGE: 4667 return f2fs_ioc_move_range(filp, arg); 4668 case F2FS_IOC_FLUSH_DEVICE: 4669 return f2fs_ioc_flush_device(filp, arg); 4670 case F2FS_IOC_GET_FEATURES: 4671 return f2fs_ioc_get_features(filp, arg); 4672 case F2FS_IOC_GET_PIN_FILE: 4673 return f2fs_ioc_get_pin_file(filp, arg); 4674 case F2FS_IOC_SET_PIN_FILE: 4675 return f2fs_ioc_set_pin_file(filp, arg); 4676 case F2FS_IOC_PRECACHE_EXTENTS: 4677 return f2fs_ioc_precache_extents(filp); 4678 case F2FS_IOC_RESIZE_FS: 4679 return f2fs_ioc_resize_fs(filp, arg); 4680 case FS_IOC_ENABLE_VERITY: 4681 return f2fs_ioc_enable_verity(filp, arg); 4682 case FS_IOC_MEASURE_VERITY: 4683 return f2fs_ioc_measure_verity(filp, arg); 4684 case FS_IOC_READ_VERITY_METADATA: 4685 return f2fs_ioc_read_verity_metadata(filp, arg); 4686 case FS_IOC_GETFSLABEL: 4687 return f2fs_ioc_getfslabel(filp, arg); 4688 case FS_IOC_SETFSLABEL: 4689 return f2fs_ioc_setfslabel(filp, arg); 4690 case F2FS_IOC_GET_COMPRESS_BLOCKS: 4691 return f2fs_ioc_get_compress_blocks(filp, arg); 4692 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: 4693 return f2fs_release_compress_blocks(filp, arg); 4694 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: 4695 return f2fs_reserve_compress_blocks(filp, arg); 4696 case F2FS_IOC_SEC_TRIM_FILE: 4697 return f2fs_sec_trim_file(filp, arg); 4698 case F2FS_IOC_GET_COMPRESS_OPTION: 4699 return f2fs_ioc_get_compress_option(filp, arg); 4700 case F2FS_IOC_SET_COMPRESS_OPTION: 4701 return f2fs_ioc_set_compress_option(filp, arg); 4702 case F2FS_IOC_DECOMPRESS_FILE: 4703 return f2fs_ioc_decompress_file(filp); 4704 case F2FS_IOC_COMPRESS_FILE: 4705 return f2fs_ioc_compress_file(filp); 4706 case F2FS_IOC_GET_DEV_ALIAS_FILE: 4707 return f2fs_ioc_get_dev_alias_file(filp, arg); 4708 case F2FS_IOC_IO_PRIO: 4709 return f2fs_ioc_io_prio(filp, arg); 4710 default: 4711 return -ENOTTY; 4712 } 4713 } 4714 4715 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 4716 { 4717 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) 4718 return -EIO; 4719 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp)))) 4720 return -ENOSPC; 4721 4722 return __f2fs_ioctl(filp, cmd, arg); 4723 } 4724 4725 /* 4726 * Return %true if the given read or write request should use direct I/O, or 4727 * %false if it should use buffered I/O. 4728 */ 4729 static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb, 4730 struct iov_iter *iter) 4731 { 4732 unsigned int align; 4733 4734 if (!(iocb->ki_flags & IOCB_DIRECT)) 4735 return false; 4736 4737 if (f2fs_force_buffered_io(inode, iov_iter_rw(iter))) 4738 return false; 4739 4740 /* 4741 * Direct I/O not aligned to the disk's logical_block_size will be 4742 * attempted, but will fail with -EINVAL. 4743 * 4744 * f2fs additionally requires that direct I/O be aligned to the 4745 * filesystem block size, which is often a stricter requirement. 4746 * However, f2fs traditionally falls back to buffered I/O on requests 4747 * that are logical_block_size-aligned but not fs-block aligned. 4748 * 4749 * The below logic implements this behavior. 4750 */ 4751 align = iocb->ki_pos | iov_iter_alignment(iter); 4752 if (!IS_ALIGNED(align, i_blocksize(inode)) && 4753 IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev))) 4754 return false; 4755 4756 return true; 4757 } 4758 4759 static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error, 4760 unsigned int flags) 4761 { 4762 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); 4763 4764 dec_page_count(sbi, F2FS_DIO_READ); 4765 if (error) 4766 return error; 4767 f2fs_update_iostat(sbi, NULL, APP_DIRECT_READ_IO, size); 4768 return 0; 4769 } 4770 4771 static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = { 4772 .end_io = f2fs_dio_read_end_io, 4773 }; 4774 4775 static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) 4776 { 4777 struct file *file = iocb->ki_filp; 4778 struct inode *inode = file_inode(file); 4779 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4780 struct f2fs_inode_info *fi = F2FS_I(inode); 4781 const loff_t pos = iocb->ki_pos; 4782 const size_t count = iov_iter_count(to); 4783 struct iomap_dio *dio; 4784 ssize_t ret; 4785 4786 if (count == 0) 4787 return 0; /* skip atime update */ 4788 4789 trace_f2fs_direct_IO_enter(inode, iocb, count, READ); 4790 4791 if (iocb->ki_flags & IOCB_NOWAIT) { 4792 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { 4793 ret = -EAGAIN; 4794 goto out; 4795 } 4796 } else { 4797 f2fs_down_read(&fi->i_gc_rwsem[READ]); 4798 } 4799 4800 /* dio is not compatible w/ atomic file */ 4801 if (f2fs_is_atomic_file(inode)) { 4802 f2fs_up_read(&fi->i_gc_rwsem[READ]); 4803 ret = -EOPNOTSUPP; 4804 goto out; 4805 } 4806 4807 /* 4808 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of 4809 * the higher-level function iomap_dio_rw() in order to ensure that the 4810 * F2FS_DIO_READ counter will be decremented correctly in all cases. 4811 */ 4812 inc_page_count(sbi, F2FS_DIO_READ); 4813 dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops, 4814 &f2fs_iomap_dio_read_ops, 0, NULL, 0); 4815 if (IS_ERR_OR_NULL(dio)) { 4816 ret = PTR_ERR_OR_ZERO(dio); 4817 if (ret != -EIOCBQUEUED) 4818 dec_page_count(sbi, F2FS_DIO_READ); 4819 } else { 4820 ret = iomap_dio_complete(dio); 4821 } 4822 4823 f2fs_up_read(&fi->i_gc_rwsem[READ]); 4824 4825 file_accessed(file); 4826 out: 4827 trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret); 4828 return ret; 4829 } 4830 4831 static void f2fs_trace_rw_file_path(struct file *file, loff_t pos, size_t count, 4832 int rw) 4833 { 4834 struct inode *inode = file_inode(file); 4835 char *buf, *path; 4836 4837 buf = f2fs_getname(F2FS_I_SB(inode)); 4838 if (!buf) 4839 return; 4840 path = dentry_path_raw(file_dentry(file), buf, PATH_MAX); 4841 if (IS_ERR(path)) 4842 goto free_buf; 4843 if (rw == WRITE) 4844 trace_f2fs_datawrite_start(inode, pos, count, 4845 current->pid, path, current->comm); 4846 else 4847 trace_f2fs_dataread_start(inode, pos, count, 4848 current->pid, path, current->comm); 4849 free_buf: 4850 f2fs_putname(buf); 4851 } 4852 4853 static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 4854 { 4855 struct inode *inode = file_inode(iocb->ki_filp); 4856 const loff_t pos = iocb->ki_pos; 4857 ssize_t ret; 4858 bool dio; 4859 4860 if (!f2fs_is_compress_backend_ready(inode)) 4861 return -EOPNOTSUPP; 4862 4863 if (trace_f2fs_dataread_start_enabled()) 4864 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, 4865 iov_iter_count(to), READ); 4866 4867 dio = f2fs_should_use_dio(inode, iocb, to); 4868 4869 /* In LFS mode, if there is inflight dio, wait for its completion */ 4870 if (f2fs_lfs_mode(F2FS_I_SB(inode)) && 4871 get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) && 4872 (!f2fs_is_pinned_file(inode) || !dio)) 4873 inode_dio_wait(inode); 4874 4875 if (dio) { 4876 ret = f2fs_dio_read_iter(iocb, to); 4877 } else { 4878 ret = filemap_read(iocb, to, 0); 4879 if (ret > 0) 4880 f2fs_update_iostat(F2FS_I_SB(inode), inode, 4881 APP_BUFFERED_READ_IO, ret); 4882 } 4883 trace_f2fs_dataread_end(inode, pos, ret); 4884 return ret; 4885 } 4886 4887 static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos, 4888 struct pipe_inode_info *pipe, 4889 size_t len, unsigned int flags) 4890 { 4891 struct inode *inode = file_inode(in); 4892 const loff_t pos = *ppos; 4893 ssize_t ret; 4894 4895 if (!f2fs_is_compress_backend_ready(inode)) 4896 return -EOPNOTSUPP; 4897 4898 if (trace_f2fs_dataread_start_enabled()) 4899 f2fs_trace_rw_file_path(in, pos, len, READ); 4900 4901 ret = filemap_splice_read(in, ppos, pipe, len, flags); 4902 if (ret > 0) 4903 f2fs_update_iostat(F2FS_I_SB(inode), inode, 4904 APP_BUFFERED_READ_IO, ret); 4905 4906 trace_f2fs_dataread_end(inode, pos, ret); 4907 return ret; 4908 } 4909 4910 static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from) 4911 { 4912 struct file *file = iocb->ki_filp; 4913 struct inode *inode = file_inode(file); 4914 ssize_t count; 4915 int err; 4916 4917 if (IS_IMMUTABLE(inode)) 4918 return -EPERM; 4919 4920 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) 4921 return -EPERM; 4922 4923 count = generic_write_checks(iocb, from); 4924 if (count <= 0) 4925 return count; 4926 4927 err = file_modified(file); 4928 if (err) 4929 return err; 4930 4931 f2fs_zero_post_eof_page(inode, 4932 iocb->ki_pos + iov_iter_count(from), true); 4933 return count; 4934 } 4935 4936 /* 4937 * Preallocate blocks for a write request, if it is possible and helpful to do 4938 * so. Returns a positive number if blocks may have been preallocated, 0 if no 4939 * blocks were preallocated, or a negative errno value if something went 4940 * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the 4941 * requested blocks (not just some of them) have been allocated. 4942 */ 4943 static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, 4944 bool dio) 4945 { 4946 struct inode *inode = file_inode(iocb->ki_filp); 4947 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4948 const loff_t pos = iocb->ki_pos; 4949 const size_t count = iov_iter_count(iter); 4950 struct f2fs_map_blocks map = {}; 4951 int flag; 4952 int ret; 4953 4954 /* If it will be an out-of-place direct write, don't bother. */ 4955 if (dio && f2fs_lfs_mode(sbi)) 4956 return 0; 4957 /* 4958 * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into 4959 * buffered IO, if DIO meets any holes. 4960 */ 4961 if (dio && i_size_read(inode) && 4962 (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode)))) 4963 return 0; 4964 4965 /* No-wait I/O can't allocate blocks. */ 4966 if (iocb->ki_flags & IOCB_NOWAIT) 4967 return 0; 4968 4969 /* If it will be a short write, don't bother. */ 4970 if (fault_in_iov_iter_readable(iter, count)) 4971 return 0; 4972 4973 if (f2fs_has_inline_data(inode)) { 4974 /* If the data will fit inline, don't bother. */ 4975 if (pos + count <= MAX_INLINE_DATA(inode)) 4976 return 0; 4977 ret = f2fs_convert_inline_inode(inode); 4978 if (ret) 4979 return ret; 4980 } 4981 4982 /* Do not preallocate blocks that will be written partially in 4KB. */ 4983 map.m_lblk = F2FS_BLK_ALIGN(pos); 4984 map.m_len = F2FS_BYTES_TO_BLK(pos + count); 4985 if (map.m_len > map.m_lblk) 4986 map.m_len -= map.m_lblk; 4987 else 4988 return 0; 4989 4990 if (!IS_DEVICE_ALIASING(inode)) 4991 map.m_may_create = true; 4992 if (dio) { 4993 map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, 4994 inode->i_write_hint); 4995 flag = F2FS_GET_BLOCK_PRE_DIO; 4996 } else { 4997 map.m_seg_type = NO_CHECK_TYPE; 4998 flag = F2FS_GET_BLOCK_PRE_AIO; 4999 } 5000 5001 ret = f2fs_map_blocks(inode, &map, flag); 5002 /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */ 5003 if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0)) 5004 return ret; 5005 if (ret == 0) 5006 set_inode_flag(inode, FI_PREALLOCATED_ALL); 5007 return map.m_len; 5008 } 5009 5010 static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb, 5011 struct iov_iter *from) 5012 { 5013 struct file *file = iocb->ki_filp; 5014 struct inode *inode = file_inode(file); 5015 ssize_t ret; 5016 5017 if (iocb->ki_flags & IOCB_NOWAIT) 5018 return -EOPNOTSUPP; 5019 5020 ret = generic_perform_write(iocb, from); 5021 5022 if (ret > 0) { 5023 f2fs_update_iostat(F2FS_I_SB(inode), inode, 5024 APP_BUFFERED_IO, ret); 5025 } 5026 return ret; 5027 } 5028 5029 static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error, 5030 unsigned int flags) 5031 { 5032 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); 5033 5034 dec_page_count(sbi, F2FS_DIO_WRITE); 5035 if (error) 5036 return error; 5037 f2fs_update_time(sbi, REQ_TIME); 5038 f2fs_update_iostat(sbi, NULL, APP_DIRECT_IO, size); 5039 return 0; 5040 } 5041 5042 static void f2fs_dio_write_submit_io(const struct iomap_iter *iter, 5043 struct bio *bio, loff_t file_offset) 5044 { 5045 struct inode *inode = iter->inode; 5046 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 5047 enum log_type type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint); 5048 enum temp_type temp = f2fs_get_segment_temp(sbi, type); 5049 5050 bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp); 5051 blk_crypto_submit_bio(bio); 5052 } 5053 5054 static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = { 5055 .end_io = f2fs_dio_write_end_io, 5056 .submit_io = f2fs_dio_write_submit_io, 5057 }; 5058 5059 static void f2fs_flush_buffered_write(struct address_space *mapping, 5060 loff_t start_pos, loff_t end_pos) 5061 { 5062 int ret; 5063 5064 ret = filemap_write_and_wait_range(mapping, start_pos, end_pos); 5065 if (ret < 0) 5066 return; 5067 invalidate_mapping_pages(mapping, 5068 start_pos >> PAGE_SHIFT, 5069 end_pos >> PAGE_SHIFT); 5070 } 5071 5072 static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, 5073 bool *may_need_sync) 5074 { 5075 struct file *file = iocb->ki_filp; 5076 struct inode *inode = file_inode(file); 5077 struct f2fs_inode_info *fi = F2FS_I(inode); 5078 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 5079 const bool do_opu = f2fs_lfs_mode(sbi); 5080 const loff_t pos = iocb->ki_pos; 5081 const ssize_t count = iov_iter_count(from); 5082 unsigned int dio_flags; 5083 struct iomap_dio *dio; 5084 ssize_t ret; 5085 5086 trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE); 5087 5088 if (iocb->ki_flags & IOCB_NOWAIT) { 5089 /* f2fs_convert_inline_inode() and block allocation can block */ 5090 if (f2fs_has_inline_data(inode) || 5091 !f2fs_overwrite_io(inode, pos, count)) { 5092 ret = -EAGAIN; 5093 goto out; 5094 } 5095 5096 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) { 5097 ret = -EAGAIN; 5098 goto out; 5099 } 5100 if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { 5101 f2fs_up_read(&fi->i_gc_rwsem[WRITE]); 5102 ret = -EAGAIN; 5103 goto out; 5104 } 5105 } else { 5106 ret = f2fs_convert_inline_inode(inode); 5107 if (ret) 5108 goto out; 5109 5110 f2fs_down_read(&fi->i_gc_rwsem[WRITE]); 5111 if (do_opu) 5112 f2fs_down_read(&fi->i_gc_rwsem[READ]); 5113 } 5114 5115 /* 5116 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of 5117 * the higher-level function iomap_dio_rw() in order to ensure that the 5118 * F2FS_DIO_WRITE counter will be decremented correctly in all cases. 5119 */ 5120 inc_page_count(sbi, F2FS_DIO_WRITE); 5121 dio_flags = 0; 5122 if (pos + count > inode->i_size) 5123 dio_flags |= IOMAP_DIO_FORCE_WAIT; 5124 dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops, 5125 &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0); 5126 if (IS_ERR_OR_NULL(dio)) { 5127 ret = PTR_ERR_OR_ZERO(dio); 5128 if (ret == -ENOTBLK) 5129 ret = 0; 5130 if (ret != -EIOCBQUEUED) 5131 dec_page_count(sbi, F2FS_DIO_WRITE); 5132 } else { 5133 ret = iomap_dio_complete(dio); 5134 } 5135 5136 if (do_opu) 5137 f2fs_up_read(&fi->i_gc_rwsem[READ]); 5138 f2fs_up_read(&fi->i_gc_rwsem[WRITE]); 5139 5140 if (ret < 0) 5141 goto out; 5142 if (pos + ret > inode->i_size) 5143 f2fs_i_size_write(inode, pos + ret); 5144 if (!do_opu) 5145 set_inode_flag(inode, FI_UPDATE_WRITE); 5146 5147 if (iov_iter_count(from)) { 5148 ssize_t ret2; 5149 loff_t bufio_start_pos = iocb->ki_pos; 5150 5151 /* 5152 * The direct write was partial, so we need to fall back to a 5153 * buffered write for the remainder. 5154 */ 5155 5156 ret2 = f2fs_buffered_write_iter(iocb, from); 5157 if (iov_iter_count(from)) 5158 f2fs_write_failed(inode, iocb->ki_pos); 5159 if (ret2 < 0) 5160 goto out; 5161 5162 /* 5163 * Ensure that the pagecache pages are written to disk and 5164 * invalidated to preserve the expected O_DIRECT semantics. 5165 */ 5166 if (ret2 > 0) { 5167 loff_t bufio_end_pos = bufio_start_pos + ret2 - 1; 5168 5169 ret += ret2; 5170 5171 f2fs_flush_buffered_write(file->f_mapping, 5172 bufio_start_pos, 5173 bufio_end_pos); 5174 } 5175 } else { 5176 /* iomap_dio_rw() already handled the generic_write_sync(). */ 5177 *may_need_sync = false; 5178 } 5179 out: 5180 trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret); 5181 return ret; 5182 } 5183 5184 static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 5185 { 5186 struct inode *inode = file_inode(iocb->ki_filp); 5187 const loff_t orig_pos = iocb->ki_pos; 5188 const size_t orig_count = iov_iter_count(from); 5189 loff_t target_size; 5190 bool dio; 5191 bool may_need_sync = true; 5192 int preallocated; 5193 const loff_t pos = iocb->ki_pos; 5194 const ssize_t count = iov_iter_count(from); 5195 ssize_t ret; 5196 5197 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { 5198 ret = -EIO; 5199 goto out; 5200 } 5201 5202 if (!f2fs_is_compress_backend_ready(inode)) { 5203 ret = -EOPNOTSUPP; 5204 goto out; 5205 } 5206 5207 if (iocb->ki_flags & IOCB_NOWAIT) { 5208 if (!inode_trylock(inode)) { 5209 ret = -EAGAIN; 5210 goto out; 5211 } 5212 } else { 5213 inode_lock(inode); 5214 } 5215 5216 if (f2fs_is_pinned_file(inode) && 5217 !f2fs_overwrite_io(inode, pos, count)) { 5218 ret = -EIO; 5219 goto out_unlock; 5220 } 5221 5222 ret = f2fs_write_checks(iocb, from); 5223 if (ret <= 0) 5224 goto out_unlock; 5225 5226 /* Determine whether we will do a direct write or a buffered write. */ 5227 dio = f2fs_should_use_dio(inode, iocb, from); 5228 5229 /* dio is not compatible w/ atomic write */ 5230 if (dio && f2fs_is_atomic_file(inode)) { 5231 ret = -EOPNOTSUPP; 5232 goto out_unlock; 5233 } 5234 5235 /* Possibly preallocate the blocks for the write. */ 5236 target_size = iocb->ki_pos + iov_iter_count(from); 5237 preallocated = f2fs_preallocate_blocks(iocb, from, dio); 5238 if (preallocated < 0) { 5239 ret = preallocated; 5240 } else { 5241 if (trace_f2fs_datawrite_start_enabled()) 5242 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, 5243 orig_count, WRITE); 5244 5245 /* Do the actual write. */ 5246 ret = dio ? 5247 f2fs_dio_write_iter(iocb, from, &may_need_sync) : 5248 f2fs_buffered_write_iter(iocb, from); 5249 5250 trace_f2fs_datawrite_end(inode, orig_pos, ret); 5251 } 5252 5253 /* Don't leave any preallocated blocks around past i_size. */ 5254 if (preallocated && i_size_read(inode) < target_size) { 5255 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 5256 filemap_invalidate_lock(inode->i_mapping); 5257 if (!f2fs_truncate(inode)) 5258 file_dont_truncate(inode); 5259 filemap_invalidate_unlock(inode->i_mapping); 5260 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 5261 } else { 5262 file_dont_truncate(inode); 5263 } 5264 5265 clear_inode_flag(inode, FI_PREALLOCATED_ALL); 5266 out_unlock: 5267 inode_unlock(inode); 5268 out: 5269 trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret); 5270 5271 if (ret > 0 && may_need_sync) 5272 ret = generic_write_sync(iocb, ret); 5273 5274 /* If buffered IO was forced, flush and drop the data from 5275 * the page cache to preserve O_DIRECT semantics 5276 */ 5277 if (ret > 0 && !dio && (iocb->ki_flags & IOCB_DIRECT)) 5278 f2fs_flush_buffered_write(iocb->ki_filp->f_mapping, 5279 orig_pos, 5280 orig_pos + ret - 1); 5281 5282 return ret; 5283 } 5284 5285 static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len, 5286 int advice) 5287 { 5288 struct address_space *mapping; 5289 struct backing_dev_info *bdi; 5290 struct inode *inode = file_inode(filp); 5291 int err; 5292 5293 trace_f2fs_fadvise(inode, offset, len, advice); 5294 5295 if (advice == POSIX_FADV_SEQUENTIAL) { 5296 if (S_ISFIFO(inode->i_mode)) 5297 return -ESPIPE; 5298 5299 mapping = filp->f_mapping; 5300 if (!mapping || len < 0) 5301 return -EINVAL; 5302 5303 bdi = inode_to_bdi(mapping->host); 5304 filp->f_ra.ra_pages = bdi->ra_pages * 5305 F2FS_I_SB(inode)->seq_file_ra_mul; 5306 spin_lock(&filp->f_lock); 5307 filp->f_mode &= ~FMODE_RANDOM; 5308 spin_unlock(&filp->f_lock); 5309 return 0; 5310 } else if (advice == POSIX_FADV_WILLNEED && offset == 0) { 5311 /* Load extent cache at the first readahead. */ 5312 f2fs_precache_extents(inode); 5313 } 5314 5315 err = generic_fadvise(filp, offset, len, advice); 5316 if (err) 5317 return err; 5318 5319 if (advice == POSIX_FADV_DONTNEED && 5320 (test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) && 5321 f2fs_compressed_file(inode))) 5322 f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino); 5323 else if (advice == POSIX_FADV_NOREUSE) 5324 err = f2fs_keep_noreuse_range(inode, offset, len); 5325 return err; 5326 } 5327 5328 #ifdef CONFIG_COMPAT 5329 struct compat_f2fs_gc_range { 5330 u32 sync; 5331 compat_u64 start; 5332 compat_u64 len; 5333 }; 5334 #define F2FS_IOC32_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11,\ 5335 struct compat_f2fs_gc_range) 5336 5337 static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg) 5338 { 5339 struct compat_f2fs_gc_range __user *urange; 5340 struct f2fs_gc_range range; 5341 int err; 5342 5343 urange = compat_ptr(arg); 5344 err = get_user(range.sync, &urange->sync); 5345 err |= get_user(range.start, &urange->start); 5346 err |= get_user(range.len, &urange->len); 5347 if (err) 5348 return -EFAULT; 5349 5350 return __f2fs_ioc_gc_range(file, &range); 5351 } 5352 5353 struct compat_f2fs_move_range { 5354 u32 dst_fd; 5355 compat_u64 pos_in; 5356 compat_u64 pos_out; 5357 compat_u64 len; 5358 }; 5359 #define F2FS_IOC32_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ 5360 struct compat_f2fs_move_range) 5361 5362 static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg) 5363 { 5364 struct compat_f2fs_move_range __user *urange; 5365 struct f2fs_move_range range; 5366 int err; 5367 5368 urange = compat_ptr(arg); 5369 err = get_user(range.dst_fd, &urange->dst_fd); 5370 err |= get_user(range.pos_in, &urange->pos_in); 5371 err |= get_user(range.pos_out, &urange->pos_out); 5372 err |= get_user(range.len, &urange->len); 5373 if (err) 5374 return -EFAULT; 5375 5376 return __f2fs_ioc_move_range(file, &range); 5377 } 5378 5379 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 5380 { 5381 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) 5382 return -EIO; 5383 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file)))) 5384 return -ENOSPC; 5385 5386 switch (cmd) { 5387 case FS_IOC32_GETVERSION: 5388 cmd = FS_IOC_GETVERSION; 5389 break; 5390 case F2FS_IOC32_GARBAGE_COLLECT_RANGE: 5391 return f2fs_compat_ioc_gc_range(file, arg); 5392 case F2FS_IOC32_MOVE_RANGE: 5393 return f2fs_compat_ioc_move_range(file, arg); 5394 case F2FS_IOC_START_ATOMIC_WRITE: 5395 case F2FS_IOC_START_ATOMIC_REPLACE: 5396 case F2FS_IOC_COMMIT_ATOMIC_WRITE: 5397 case F2FS_IOC_START_VOLATILE_WRITE: 5398 case F2FS_IOC_RELEASE_VOLATILE_WRITE: 5399 case F2FS_IOC_ABORT_ATOMIC_WRITE: 5400 case F2FS_IOC_SHUTDOWN: 5401 case FITRIM: 5402 case FS_IOC_SET_ENCRYPTION_POLICY: 5403 case FS_IOC_GET_ENCRYPTION_PWSALT: 5404 case FS_IOC_GET_ENCRYPTION_POLICY: 5405 case FS_IOC_GET_ENCRYPTION_POLICY_EX: 5406 case FS_IOC_ADD_ENCRYPTION_KEY: 5407 case FS_IOC_REMOVE_ENCRYPTION_KEY: 5408 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: 5409 case FS_IOC_GET_ENCRYPTION_KEY_STATUS: 5410 case FS_IOC_GET_ENCRYPTION_NONCE: 5411 case F2FS_IOC_GARBAGE_COLLECT: 5412 case F2FS_IOC_WRITE_CHECKPOINT: 5413 case F2FS_IOC_DEFRAGMENT: 5414 case F2FS_IOC_FLUSH_DEVICE: 5415 case F2FS_IOC_GET_FEATURES: 5416 case F2FS_IOC_GET_PIN_FILE: 5417 case F2FS_IOC_SET_PIN_FILE: 5418 case F2FS_IOC_PRECACHE_EXTENTS: 5419 case F2FS_IOC_RESIZE_FS: 5420 case FS_IOC_ENABLE_VERITY: 5421 case FS_IOC_MEASURE_VERITY: 5422 case FS_IOC_READ_VERITY_METADATA: 5423 case FS_IOC_GETFSLABEL: 5424 case FS_IOC_SETFSLABEL: 5425 case F2FS_IOC_GET_COMPRESS_BLOCKS: 5426 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: 5427 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: 5428 case F2FS_IOC_SEC_TRIM_FILE: 5429 case F2FS_IOC_GET_COMPRESS_OPTION: 5430 case F2FS_IOC_SET_COMPRESS_OPTION: 5431 case F2FS_IOC_DECOMPRESS_FILE: 5432 case F2FS_IOC_COMPRESS_FILE: 5433 case F2FS_IOC_GET_DEV_ALIAS_FILE: 5434 case F2FS_IOC_IO_PRIO: 5435 break; 5436 default: 5437 return -ENOIOCTLCMD; 5438 } 5439 return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); 5440 } 5441 #endif 5442 5443 const struct file_operations f2fs_file_operations = { 5444 .llseek = f2fs_llseek, 5445 .read_iter = f2fs_file_read_iter, 5446 .write_iter = f2fs_file_write_iter, 5447 .iopoll = iocb_bio_iopoll, 5448 .open = f2fs_file_open, 5449 .release = f2fs_release_file, 5450 .mmap_prepare = f2fs_file_mmap_prepare, 5451 .flush = f2fs_file_flush, 5452 .fsync = f2fs_sync_file, 5453 .fallocate = f2fs_fallocate, 5454 .unlocked_ioctl = f2fs_ioctl, 5455 #ifdef CONFIG_COMPAT 5456 .compat_ioctl = f2fs_compat_ioctl, 5457 #endif 5458 .splice_read = f2fs_file_splice_read, 5459 .splice_write = iter_file_splice_write, 5460 .fadvise = f2fs_file_fadvise, 5461 .fop_flags = FOP_BUFFER_RASYNC, 5462 .setlease = generic_setlease, 5463 }; 5464