1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/f2fs/file.c 4 * 5 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 6 * http://www.samsung.com/ 7 */ 8 #include <linux/fs.h> 9 #include <linux/f2fs_fs.h> 10 #include <linux/stat.h> 11 #include <linux/writeback.h> 12 #include <linux/blkdev.h> 13 #include <linux/falloc.h> 14 #include <linux/types.h> 15 #include <linux/compat.h> 16 #include <linux/uaccess.h> 17 #include <linux/mount.h> 18 #include <linux/pagevec.h> 19 #include <linux/uio.h> 20 #include <linux/uuid.h> 21 #include <linux/file.h> 22 #include <linux/nls.h> 23 #include <linux/sched/signal.h> 24 #include <linux/fileattr.h> 25 #include <linux/fadvise.h> 26 #include <linux/iomap.h> 27 28 #include "f2fs.h" 29 #include "node.h" 30 #include "segment.h" 31 #include "xattr.h" 32 #include "acl.h" 33 #include "gc.h" 34 #include "iostat.h" 35 #include <trace/events/f2fs.h> 36 #include <uapi/linux/f2fs.h> 37 38 static void f2fs_zero_post_eof_page(struct inode *inode, 39 loff_t new_size, bool lock) 40 { 41 loff_t old_size = i_size_read(inode); 42 43 if (old_size >= new_size) 44 return; 45 46 if (mapping_empty(inode->i_mapping)) 47 return; 48 49 if (lock) 50 filemap_invalidate_lock(inode->i_mapping); 51 /* zero or drop pages only in range of [old_size, new_size] */ 52 truncate_inode_pages_range(inode->i_mapping, old_size, new_size); 53 if (lock) 54 filemap_invalidate_unlock(inode->i_mapping); 55 } 56 57 static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) 58 { 59 struct inode *inode = file_inode(vmf->vma->vm_file); 60 vm_flags_t flags = vmf->vma->vm_flags; 61 vm_fault_t ret; 62 63 ret = filemap_fault(vmf); 64 if (ret & VM_FAULT_LOCKED) 65 f2fs_update_iostat(F2FS_I_SB(inode), inode, 66 APP_MAPPED_READ_IO, F2FS_BLKSIZE); 67 68 trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret); 69 70 return ret; 71 } 72 73 static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) 74 { 75 struct folio *folio = page_folio(vmf->page); 76 struct inode *inode = file_inode(vmf->vma->vm_file); 77 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 78 struct dnode_of_data dn; 79 bool need_alloc = !f2fs_is_pinned_file(inode); 80 int err = 0; 81 vm_fault_t ret; 82 83 if (unlikely(IS_IMMUTABLE(inode))) 84 return VM_FAULT_SIGBUS; 85 86 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 87 err = -EIO; 88 goto out; 89 } 90 91 if (unlikely(f2fs_cp_error(sbi))) { 92 err = -EIO; 93 goto out; 94 } 95 96 if (!f2fs_is_checkpoint_ready(sbi)) { 97 err = -ENOSPC; 98 goto out; 99 } 100 101 err = f2fs_convert_inline_inode(inode); 102 if (err) 103 goto out; 104 105 #ifdef CONFIG_F2FS_FS_COMPRESSION 106 if (f2fs_compressed_file(inode)) { 107 int ret = f2fs_is_compressed_cluster(inode, folio->index); 108 109 if (ret < 0) { 110 err = ret; 111 goto out; 112 } else if (ret) { 113 need_alloc = false; 114 } 115 } 116 #endif 117 /* should do out of any locked page */ 118 if (need_alloc) 119 f2fs_balance_fs(sbi, true); 120 121 sb_start_pagefault(inode->i_sb); 122 123 f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); 124 125 f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT, true); 126 127 file_update_time(vmf->vma->vm_file); 128 filemap_invalidate_lock_shared(inode->i_mapping); 129 130 folio_lock(folio); 131 if (unlikely(folio->mapping != inode->i_mapping || 132 folio_pos(folio) > i_size_read(inode) || 133 !folio_test_uptodate(folio))) { 134 folio_unlock(folio); 135 err = -EFAULT; 136 goto out_sem; 137 } 138 139 set_new_dnode(&dn, inode, NULL, NULL, 0); 140 if (need_alloc) { 141 /* block allocation */ 142 err = f2fs_get_block_locked(&dn, folio->index); 143 } else { 144 err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); 145 f2fs_put_dnode(&dn); 146 if (f2fs_is_pinned_file(inode) && 147 !__is_valid_data_blkaddr(dn.data_blkaddr)) 148 err = -EIO; 149 } 150 151 if (err) { 152 folio_unlock(folio); 153 goto out_sem; 154 } 155 156 f2fs_folio_wait_writeback(folio, DATA, false, true); 157 158 /* wait for GCed page writeback via META_MAPPING */ 159 f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); 160 161 /* 162 * check to see if the page is mapped already (no holes) 163 */ 164 if (folio_test_mappedtodisk(folio)) 165 goto out_sem; 166 167 /* page is wholly or partially inside EOF */ 168 if (((loff_t)(folio->index + 1) << PAGE_SHIFT) > 169 i_size_read(inode)) { 170 loff_t offset; 171 172 offset = i_size_read(inode) & ~PAGE_MASK; 173 folio_zero_segment(folio, offset, folio_size(folio)); 174 } 175 folio_mark_dirty(folio); 176 177 f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE); 178 f2fs_update_time(sbi, REQ_TIME); 179 180 out_sem: 181 filemap_invalidate_unlock_shared(inode->i_mapping); 182 183 sb_end_pagefault(inode->i_sb); 184 out: 185 ret = vmf_fs_error(err); 186 187 trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret); 188 return ret; 189 } 190 191 static const struct vm_operations_struct f2fs_file_vm_ops = { 192 .fault = f2fs_filemap_fault, 193 .map_pages = filemap_map_pages, 194 .page_mkwrite = f2fs_vm_page_mkwrite, 195 }; 196 197 static int get_parent_ino(struct inode *inode, nid_t *pino) 198 { 199 struct dentry *dentry; 200 201 /* 202 * Make sure to get the non-deleted alias. The alias associated with 203 * the open file descriptor being fsync()'ed may be deleted already. 204 */ 205 dentry = d_find_alias(inode); 206 if (!dentry) 207 return 0; 208 209 *pino = d_parent_ino(dentry); 210 dput(dentry); 211 return 1; 212 } 213 214 static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) 215 { 216 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 217 enum cp_reason_type cp_reason = CP_NO_NEEDED; 218 219 if (!S_ISREG(inode->i_mode)) 220 cp_reason = CP_NON_REGULAR; 221 else if (f2fs_compressed_file(inode)) 222 cp_reason = CP_COMPRESSED; 223 else if (inode->i_nlink != 1) 224 cp_reason = CP_HARDLINK; 225 else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) 226 cp_reason = CP_SB_NEED_CP; 227 else if (file_wrong_pino(inode)) 228 cp_reason = CP_WRONG_PINO; 229 else if (!f2fs_space_for_roll_forward(sbi)) 230 cp_reason = CP_NO_SPC_ROLL; 231 else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) 232 cp_reason = CP_NODE_NEED_CP; 233 else if (test_opt(sbi, FASTBOOT)) 234 cp_reason = CP_FASTBOOT_MODE; 235 else if (F2FS_OPTION(sbi).active_logs == 2) 236 cp_reason = CP_SPEC_LOG_NUM; 237 else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT && 238 f2fs_need_dentry_mark(sbi, inode->i_ino) && 239 f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, 240 TRANS_DIR_INO)) 241 cp_reason = CP_RECOVER_DIR; 242 else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, 243 XATTR_DIR_INO)) 244 cp_reason = CP_XATTR_DIR; 245 246 return cp_reason; 247 } 248 249 static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) 250 { 251 struct folio *i = filemap_get_folio(NODE_MAPPING(sbi), ino); 252 bool ret = false; 253 /* But we need to avoid that there are some inode updates */ 254 if ((!IS_ERR(i) && folio_test_dirty(i)) || 255 f2fs_need_inode_block_update(sbi, ino)) 256 ret = true; 257 f2fs_folio_put(i, false); 258 return ret; 259 } 260 261 static void try_to_fix_pino(struct inode *inode) 262 { 263 struct f2fs_inode_info *fi = F2FS_I(inode); 264 nid_t pino; 265 266 f2fs_down_write(&fi->i_sem); 267 if (file_wrong_pino(inode) && inode->i_nlink == 1 && 268 get_parent_ino(inode, &pino)) { 269 f2fs_i_pino_write(inode, pino); 270 file_got_pino(inode); 271 } 272 f2fs_up_write(&fi->i_sem); 273 } 274 275 static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, 276 int datasync, bool atomic) 277 { 278 struct inode *inode = file->f_mapping->host; 279 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 280 nid_t ino = inode->i_ino; 281 int ret = 0; 282 enum cp_reason_type cp_reason = 0; 283 struct writeback_control wbc = { 284 .sync_mode = WB_SYNC_ALL, 285 .nr_to_write = LONG_MAX, 286 }; 287 unsigned int seq_id = 0; 288 289 if (unlikely(f2fs_readonly(inode->i_sb))) 290 return 0; 291 292 trace_f2fs_sync_file_enter(inode); 293 294 if (S_ISDIR(inode->i_mode)) 295 goto go_write; 296 297 /* if fdatasync is triggered, let's do in-place-update */ 298 if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) 299 set_inode_flag(inode, FI_NEED_IPU); 300 ret = file_write_and_wait_range(file, start, end); 301 clear_inode_flag(inode, FI_NEED_IPU); 302 303 if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { 304 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); 305 return ret; 306 } 307 308 /* if the inode is dirty, let's recover all the time */ 309 if (!f2fs_skip_inode_update(inode, datasync)) { 310 f2fs_write_inode(inode, NULL); 311 goto go_write; 312 } 313 314 /* 315 * if there is no written data, don't waste time to write recovery info. 316 */ 317 if (!is_inode_flag_set(inode, FI_APPEND_WRITE) && 318 !f2fs_exist_written_data(sbi, ino, APPEND_INO)) { 319 320 /* it may call write_inode just prior to fsync */ 321 if (need_inode_page_update(sbi, ino)) 322 goto go_write; 323 324 if (is_inode_flag_set(inode, FI_UPDATE_WRITE) || 325 f2fs_exist_written_data(sbi, ino, UPDATE_INO)) 326 goto flush_out; 327 goto out; 328 } else { 329 /* 330 * for OPU case, during fsync(), node can be persisted before 331 * data when lower device doesn't support write barrier, result 332 * in data corruption after SPO. 333 * So for strict fsync mode, force to use atomic write semantics 334 * to keep write order in between data/node and last node to 335 * avoid potential data corruption. 336 */ 337 if (F2FS_OPTION(sbi).fsync_mode == 338 FSYNC_MODE_STRICT && !atomic) 339 atomic = true; 340 } 341 go_write: 342 /* 343 * Both of fdatasync() and fsync() are able to be recovered from 344 * sudden-power-off. 345 */ 346 f2fs_down_read(&F2FS_I(inode)->i_sem); 347 cp_reason = need_do_checkpoint(inode); 348 f2fs_up_read(&F2FS_I(inode)->i_sem); 349 350 if (cp_reason) { 351 /* all the dirty node pages should be flushed for POR */ 352 ret = f2fs_sync_fs(inode->i_sb, 1); 353 354 /* 355 * We've secured consistency through sync_fs. Following pino 356 * will be used only for fsynced inodes after checkpoint. 357 */ 358 try_to_fix_pino(inode); 359 clear_inode_flag(inode, FI_APPEND_WRITE); 360 clear_inode_flag(inode, FI_UPDATE_WRITE); 361 goto out; 362 } 363 sync_nodes: 364 atomic_inc(&sbi->wb_sync_req[NODE]); 365 ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id); 366 atomic_dec(&sbi->wb_sync_req[NODE]); 367 if (ret) 368 goto out; 369 370 /* if cp_error was enabled, we should avoid infinite loop */ 371 if (unlikely(f2fs_cp_error(sbi))) { 372 ret = -EIO; 373 goto out; 374 } 375 376 if (f2fs_need_inode_block_update(sbi, ino)) { 377 f2fs_mark_inode_dirty_sync(inode, true); 378 f2fs_write_inode(inode, NULL); 379 goto sync_nodes; 380 } 381 382 /* 383 * If it's atomic_write, it's just fine to keep write ordering. So 384 * here we don't need to wait for node write completion, since we use 385 * node chain which serializes node blocks. If one of node writes are 386 * reordered, we can see simply broken chain, resulting in stopping 387 * roll-forward recovery. It means we'll recover all or none node blocks 388 * given fsync mark. 389 */ 390 if (!atomic) { 391 ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id); 392 if (ret) 393 goto out; 394 } 395 396 /* once recovery info is written, don't need to tack this */ 397 f2fs_remove_ino_entry(sbi, ino, APPEND_INO); 398 clear_inode_flag(inode, FI_APPEND_WRITE); 399 flush_out: 400 if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) 401 ret = f2fs_issue_flush(sbi, inode->i_ino); 402 if (!ret) { 403 f2fs_remove_ino_entry(sbi, ino, UPDATE_INO); 404 clear_inode_flag(inode, FI_UPDATE_WRITE); 405 f2fs_remove_ino_entry(sbi, ino, FLUSH_INO); 406 } 407 f2fs_update_time(sbi, REQ_TIME); 408 out: 409 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); 410 return ret; 411 } 412 413 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 414 { 415 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) 416 return -EIO; 417 return f2fs_do_sync_file(file, start, end, datasync, false); 418 } 419 420 static bool __found_offset(struct address_space *mapping, 421 struct dnode_of_data *dn, pgoff_t index, int whence) 422 { 423 block_t blkaddr = f2fs_data_blkaddr(dn); 424 struct inode *inode = mapping->host; 425 bool compressed_cluster = false; 426 427 if (f2fs_compressed_file(inode)) { 428 block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_folio, 429 ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size)); 430 431 compressed_cluster = first_blkaddr == COMPRESS_ADDR; 432 } 433 434 switch (whence) { 435 case SEEK_DATA: 436 if (__is_valid_data_blkaddr(blkaddr)) 437 return true; 438 if (blkaddr == NEW_ADDR && 439 xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY)) 440 return true; 441 if (compressed_cluster) 442 return true; 443 break; 444 case SEEK_HOLE: 445 if (compressed_cluster) 446 return false; 447 if (blkaddr == NULL_ADDR) 448 return true; 449 break; 450 } 451 return false; 452 } 453 454 static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) 455 { 456 struct inode *inode = file->f_mapping->host; 457 loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 458 struct dnode_of_data dn; 459 pgoff_t pgofs, end_offset; 460 loff_t data_ofs = offset; 461 loff_t isize; 462 int err = 0; 463 464 inode_lock_shared(inode); 465 466 isize = i_size_read(inode); 467 if (offset >= isize) 468 goto fail; 469 470 /* handle inline data case */ 471 if (f2fs_has_inline_data(inode)) { 472 if (whence == SEEK_HOLE) { 473 data_ofs = isize; 474 goto found; 475 } else if (whence == SEEK_DATA) { 476 data_ofs = offset; 477 goto found; 478 } 479 } 480 481 pgofs = (pgoff_t)(offset >> PAGE_SHIFT); 482 483 for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) { 484 set_new_dnode(&dn, inode, NULL, NULL, 0); 485 err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE); 486 if (err && err != -ENOENT) { 487 goto fail; 488 } else if (err == -ENOENT) { 489 /* direct node does not exists */ 490 if (whence == SEEK_DATA) { 491 pgofs = f2fs_get_next_page_offset(&dn, pgofs); 492 continue; 493 } else { 494 goto found; 495 } 496 } 497 498 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 499 500 /* find data/hole in dnode block */ 501 for (; dn.ofs_in_node < end_offset; 502 dn.ofs_in_node++, pgofs++, 503 data_ofs = (loff_t)pgofs << PAGE_SHIFT) { 504 block_t blkaddr; 505 506 blkaddr = f2fs_data_blkaddr(&dn); 507 508 if (__is_valid_data_blkaddr(blkaddr) && 509 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), 510 blkaddr, DATA_GENERIC_ENHANCE)) { 511 f2fs_put_dnode(&dn); 512 goto fail; 513 } 514 515 if (__found_offset(file->f_mapping, &dn, 516 pgofs, whence)) { 517 f2fs_put_dnode(&dn); 518 goto found; 519 } 520 } 521 f2fs_put_dnode(&dn); 522 } 523 524 if (whence == SEEK_DATA) 525 goto fail; 526 found: 527 if (whence == SEEK_HOLE && data_ofs > isize) 528 data_ofs = isize; 529 inode_unlock_shared(inode); 530 return vfs_setpos(file, data_ofs, maxbytes); 531 fail: 532 inode_unlock_shared(inode); 533 return -ENXIO; 534 } 535 536 static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) 537 { 538 struct inode *inode = file->f_mapping->host; 539 loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 540 541 switch (whence) { 542 case SEEK_SET: 543 case SEEK_CUR: 544 case SEEK_END: 545 return generic_file_llseek_size(file, offset, whence, 546 maxbytes, i_size_read(inode)); 547 case SEEK_DATA: 548 case SEEK_HOLE: 549 if (offset < 0) 550 return -ENXIO; 551 return f2fs_seek_block(file, offset, whence); 552 } 553 554 return -EINVAL; 555 } 556 557 static int f2fs_file_mmap_prepare(struct vm_area_desc *desc) 558 { 559 struct file *file = desc->file; 560 struct inode *inode = file_inode(file); 561 562 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 563 return -EIO; 564 565 if (!f2fs_is_compress_backend_ready(inode)) 566 return -EOPNOTSUPP; 567 568 file_accessed(file); 569 desc->vm_ops = &f2fs_file_vm_ops; 570 571 f2fs_down_read(&F2FS_I(inode)->i_sem); 572 set_inode_flag(inode, FI_MMAP_FILE); 573 f2fs_up_read(&F2FS_I(inode)->i_sem); 574 575 return 0; 576 } 577 578 static int finish_preallocate_blocks(struct inode *inode) 579 { 580 int ret = 0; 581 bool opened; 582 583 f2fs_down_read(&F2FS_I(inode)->i_sem); 584 opened = is_inode_flag_set(inode, FI_OPENED_FILE); 585 f2fs_up_read(&F2FS_I(inode)->i_sem); 586 if (opened) 587 return 0; 588 589 inode_lock(inode); 590 if (is_inode_flag_set(inode, FI_OPENED_FILE)) 591 goto out_unlock; 592 593 if (!file_should_truncate(inode)) 594 goto out_update; 595 596 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 597 filemap_invalidate_lock(inode->i_mapping); 598 599 truncate_setsize(inode, i_size_read(inode)); 600 ret = f2fs_truncate(inode); 601 602 filemap_invalidate_unlock(inode->i_mapping); 603 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 604 if (ret) 605 goto out_unlock; 606 607 file_dont_truncate(inode); 608 out_update: 609 f2fs_down_write(&F2FS_I(inode)->i_sem); 610 set_inode_flag(inode, FI_OPENED_FILE); 611 f2fs_up_write(&F2FS_I(inode)->i_sem); 612 out_unlock: 613 inode_unlock(inode); 614 return ret; 615 } 616 617 static int f2fs_file_open(struct inode *inode, struct file *filp) 618 { 619 int err = fscrypt_file_open(inode, filp); 620 621 if (err) 622 return err; 623 624 if (!f2fs_is_compress_backend_ready(inode)) 625 return -EOPNOTSUPP; 626 627 err = fsverity_file_open(inode, filp); 628 if (err) 629 return err; 630 631 filp->f_mode |= FMODE_NOWAIT; 632 filp->f_mode |= FMODE_CAN_ODIRECT; 633 634 err = dquot_file_open(inode, filp); 635 if (err) 636 return err; 637 638 err = finish_preallocate_blocks(inode); 639 if (!err) 640 atomic_inc(&F2FS_I(inode)->open_count); 641 return err; 642 } 643 644 void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) 645 { 646 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 647 int nr_free = 0, ofs = dn->ofs_in_node, len = count; 648 __le32 *addr; 649 bool compressed_cluster = false; 650 int cluster_index = 0, valid_blocks = 0; 651 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 652 bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks); 653 block_t blkstart; 654 int blklen = 0; 655 656 addr = get_dnode_addr(dn->inode, dn->node_folio) + ofs; 657 blkstart = le32_to_cpu(*addr); 658 659 /* Assumption: truncation starts with cluster */ 660 for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { 661 block_t blkaddr = le32_to_cpu(*addr); 662 663 if (f2fs_compressed_file(dn->inode) && 664 !(cluster_index & (cluster_size - 1))) { 665 if (compressed_cluster) 666 f2fs_i_compr_blocks_update(dn->inode, 667 valid_blocks, false); 668 compressed_cluster = (blkaddr == COMPRESS_ADDR); 669 valid_blocks = 0; 670 } 671 672 if (blkaddr == NULL_ADDR) 673 goto next; 674 675 f2fs_set_data_blkaddr(dn, NULL_ADDR); 676 677 if (__is_valid_data_blkaddr(blkaddr)) { 678 if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE)) 679 goto next; 680 if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr, 681 DATA_GENERIC_ENHANCE)) 682 goto next; 683 if (compressed_cluster) 684 valid_blocks++; 685 } 686 687 if (blkstart + blklen == blkaddr) { 688 blklen++; 689 } else { 690 f2fs_invalidate_blocks(sbi, blkstart, blklen); 691 blkstart = blkaddr; 692 blklen = 1; 693 } 694 695 if (!released || blkaddr != COMPRESS_ADDR) 696 nr_free++; 697 698 continue; 699 700 next: 701 if (blklen) 702 f2fs_invalidate_blocks(sbi, blkstart, blklen); 703 704 blkstart = le32_to_cpu(*(addr + 1)); 705 blklen = 0; 706 } 707 708 if (blklen) 709 f2fs_invalidate_blocks(sbi, blkstart, blklen); 710 711 if (compressed_cluster) 712 f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false); 713 714 if (nr_free) { 715 pgoff_t fofs; 716 /* 717 * once we invalidate valid blkaddr in range [ofs, ofs + count], 718 * we will invalidate all blkaddr in the whole range. 719 */ 720 fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), 721 dn->inode) + ofs; 722 f2fs_update_read_extent_cache_range(dn, fofs, 0, len); 723 f2fs_update_age_extent_cache_range(dn, fofs, len); 724 dec_valid_block_count(sbi, dn->inode, nr_free); 725 } 726 dn->ofs_in_node = ofs; 727 728 f2fs_update_time(sbi, REQ_TIME); 729 trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, 730 dn->ofs_in_node, nr_free); 731 } 732 733 static int truncate_partial_data_page(struct inode *inode, u64 from, 734 bool cache_only) 735 { 736 loff_t offset = from & (PAGE_SIZE - 1); 737 pgoff_t index = from >> PAGE_SHIFT; 738 struct address_space *mapping = inode->i_mapping; 739 struct folio *folio; 740 741 if (!offset && !cache_only) 742 return 0; 743 744 if (cache_only) { 745 folio = filemap_lock_folio(mapping, index); 746 if (IS_ERR(folio)) 747 return 0; 748 if (folio_test_uptodate(folio)) 749 goto truncate_out; 750 f2fs_folio_put(folio, true); 751 return 0; 752 } 753 754 folio = f2fs_get_lock_data_folio(inode, index, true); 755 if (IS_ERR(folio)) 756 return PTR_ERR(folio) == -ENOENT ? 0 : PTR_ERR(folio); 757 truncate_out: 758 f2fs_folio_wait_writeback(folio, DATA, true, true); 759 folio_zero_segment(folio, offset, folio_size(folio)); 760 761 /* An encrypted inode should have a key and truncate the last page. */ 762 f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode)); 763 if (!cache_only) 764 folio_mark_dirty(folio); 765 f2fs_folio_put(folio, true); 766 return 0; 767 } 768 769 int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) 770 { 771 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 772 struct dnode_of_data dn; 773 pgoff_t free_from; 774 int count = 0, err = 0; 775 struct folio *ifolio; 776 bool truncate_page = false; 777 778 trace_f2fs_truncate_blocks_enter(inode, from); 779 780 if (IS_DEVICE_ALIASING(inode) && from) { 781 err = -EINVAL; 782 goto out_err; 783 } 784 785 free_from = (pgoff_t)F2FS_BLK_ALIGN(from); 786 787 if (free_from >= max_file_blocks(inode)) 788 goto free_partial; 789 790 if (lock) 791 f2fs_lock_op(sbi); 792 793 ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); 794 if (IS_ERR(ifolio)) { 795 err = PTR_ERR(ifolio); 796 goto out; 797 } 798 799 if (IS_DEVICE_ALIASING(inode)) { 800 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; 801 struct extent_info ei = et->largest; 802 803 f2fs_invalidate_blocks(sbi, ei.blk, ei.len); 804 805 dec_valid_block_count(sbi, inode, ei.len); 806 f2fs_update_time(sbi, REQ_TIME); 807 808 f2fs_folio_put(ifolio, true); 809 goto out; 810 } 811 812 if (f2fs_has_inline_data(inode)) { 813 f2fs_truncate_inline_inode(inode, ifolio, from); 814 f2fs_folio_put(ifolio, true); 815 truncate_page = true; 816 goto out; 817 } 818 819 set_new_dnode(&dn, inode, ifolio, NULL, 0); 820 err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); 821 if (err) { 822 if (err == -ENOENT) 823 goto free_next; 824 goto out; 825 } 826 827 count = ADDRS_PER_PAGE(dn.node_folio, inode); 828 829 count -= dn.ofs_in_node; 830 f2fs_bug_on(sbi, count < 0); 831 832 if (dn.ofs_in_node || IS_INODE(dn.node_folio)) { 833 f2fs_truncate_data_blocks_range(&dn, count); 834 free_from += count; 835 } 836 837 f2fs_put_dnode(&dn); 838 free_next: 839 err = f2fs_truncate_inode_blocks(inode, free_from); 840 out: 841 if (lock) 842 f2fs_unlock_op(sbi); 843 free_partial: 844 /* lastly zero out the first data page */ 845 if (!err) 846 err = truncate_partial_data_page(inode, from, truncate_page); 847 out_err: 848 trace_f2fs_truncate_blocks_exit(inode, err); 849 return err; 850 } 851 852 int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) 853 { 854 u64 free_from = from; 855 int err; 856 857 #ifdef CONFIG_F2FS_FS_COMPRESSION 858 /* 859 * for compressed file, only support cluster size 860 * aligned truncation. 861 */ 862 if (f2fs_compressed_file(inode)) 863 free_from = round_up(from, 864 F2FS_I(inode)->i_cluster_size << PAGE_SHIFT); 865 #endif 866 867 err = f2fs_do_truncate_blocks(inode, free_from, lock); 868 if (err) 869 return err; 870 871 #ifdef CONFIG_F2FS_FS_COMPRESSION 872 /* 873 * For compressed file, after release compress blocks, don't allow write 874 * direct, but we should allow write direct after truncate to zero. 875 */ 876 if (f2fs_compressed_file(inode) && !free_from 877 && is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) 878 clear_inode_flag(inode, FI_COMPRESS_RELEASED); 879 880 if (from != free_from) { 881 err = f2fs_truncate_partial_cluster(inode, from, lock); 882 if (err) 883 return err; 884 } 885 #endif 886 887 return 0; 888 } 889 890 int f2fs_truncate(struct inode *inode) 891 { 892 int err; 893 894 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 895 return -EIO; 896 897 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 898 S_ISLNK(inode->i_mode))) 899 return 0; 900 901 trace_f2fs_truncate(inode); 902 903 if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) 904 return -EIO; 905 906 err = f2fs_dquot_initialize(inode); 907 if (err) 908 return err; 909 910 /* we should check inline_data size */ 911 if (!f2fs_may_inline_data(inode)) { 912 err = f2fs_convert_inline_inode(inode); 913 if (err) { 914 /* 915 * Always truncate page #0 to avoid page cache 916 * leak in evict() path. 917 */ 918 truncate_inode_pages_range(inode->i_mapping, 919 F2FS_BLK_TO_BYTES(0), 920 F2FS_BLK_END_BYTES(0)); 921 return err; 922 } 923 } 924 925 err = f2fs_truncate_blocks(inode, i_size_read(inode), true); 926 if (err) 927 return err; 928 929 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 930 f2fs_mark_inode_dirty_sync(inode, false); 931 return 0; 932 } 933 934 static bool f2fs_force_buffered_io(struct inode *inode, int rw) 935 { 936 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 937 938 if (!fscrypt_dio_supported(inode)) 939 return true; 940 if (fsverity_active(inode)) 941 return true; 942 if (f2fs_compressed_file(inode)) 943 return true; 944 /* 945 * only force direct read to use buffered IO, for direct write, 946 * it expects inline data conversion before committing IO. 947 */ 948 if (f2fs_has_inline_data(inode) && rw == READ) 949 return true; 950 951 /* disallow direct IO if any of devices has unaligned blksize */ 952 if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) 953 return true; 954 /* 955 * for blkzoned device, fallback direct IO to buffered IO, so 956 * all IOs can be serialized by log-structured write. 957 */ 958 if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) && 959 !f2fs_is_pinned_file(inode)) 960 return true; 961 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) 962 return true; 963 964 return false; 965 } 966 967 int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path, 968 struct kstat *stat, u32 request_mask, unsigned int query_flags) 969 { 970 struct inode *inode = d_inode(path->dentry); 971 struct f2fs_inode_info *fi = F2FS_I(inode); 972 struct f2fs_inode *ri = NULL; 973 unsigned int flags; 974 975 if (f2fs_has_extra_attr(inode) && 976 f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && 977 F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { 978 stat->result_mask |= STATX_BTIME; 979 stat->btime.tv_sec = fi->i_crtime.tv_sec; 980 stat->btime.tv_nsec = fi->i_crtime.tv_nsec; 981 } 982 983 /* 984 * Return the DIO alignment restrictions if requested. We only return 985 * this information when requested, since on encrypted files it might 986 * take a fair bit of work to get if the file wasn't opened recently. 987 * 988 * f2fs sometimes supports DIO reads but not DIO writes. STATX_DIOALIGN 989 * cannot represent that, so in that case we report no DIO support. 990 */ 991 if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) { 992 unsigned int bsize = i_blocksize(inode); 993 994 stat->result_mask |= STATX_DIOALIGN; 995 if (!f2fs_force_buffered_io(inode, WRITE)) { 996 stat->dio_mem_align = bsize; 997 stat->dio_offset_align = bsize; 998 } 999 } 1000 1001 flags = fi->i_flags; 1002 if (flags & F2FS_COMPR_FL) 1003 stat->attributes |= STATX_ATTR_COMPRESSED; 1004 if (flags & F2FS_APPEND_FL) 1005 stat->attributes |= STATX_ATTR_APPEND; 1006 if (IS_ENCRYPTED(inode)) 1007 stat->attributes |= STATX_ATTR_ENCRYPTED; 1008 if (flags & F2FS_IMMUTABLE_FL) 1009 stat->attributes |= STATX_ATTR_IMMUTABLE; 1010 if (flags & F2FS_NODUMP_FL) 1011 stat->attributes |= STATX_ATTR_NODUMP; 1012 if (IS_VERITY(inode)) 1013 stat->attributes |= STATX_ATTR_VERITY; 1014 1015 stat->attributes_mask |= (STATX_ATTR_COMPRESSED | 1016 STATX_ATTR_APPEND | 1017 STATX_ATTR_ENCRYPTED | 1018 STATX_ATTR_IMMUTABLE | 1019 STATX_ATTR_NODUMP | 1020 STATX_ATTR_VERITY); 1021 1022 generic_fillattr(idmap, request_mask, inode, stat); 1023 1024 /* we need to show initial sectors used for inline_data/dentries */ 1025 if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) || 1026 f2fs_has_inline_dentry(inode)) 1027 stat->blocks += (stat->size + 511) >> 9; 1028 1029 return 0; 1030 } 1031 1032 #ifdef CONFIG_F2FS_FS_POSIX_ACL 1033 static void __setattr_copy(struct mnt_idmap *idmap, 1034 struct inode *inode, const struct iattr *attr) 1035 { 1036 unsigned int ia_valid = attr->ia_valid; 1037 1038 i_uid_update(idmap, attr, inode); 1039 i_gid_update(idmap, attr, inode); 1040 if (ia_valid & ATTR_ATIME) 1041 inode_set_atime_to_ts(inode, attr->ia_atime); 1042 if (ia_valid & ATTR_MTIME) 1043 inode_set_mtime_to_ts(inode, attr->ia_mtime); 1044 if (ia_valid & ATTR_CTIME) 1045 inode_set_ctime_to_ts(inode, attr->ia_ctime); 1046 if (ia_valid & ATTR_MODE) { 1047 umode_t mode = attr->ia_mode; 1048 1049 if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) 1050 mode &= ~S_ISGID; 1051 set_acl_inode(inode, mode); 1052 } 1053 } 1054 #else 1055 #define __setattr_copy setattr_copy 1056 #endif 1057 1058 int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 1059 struct iattr *attr) 1060 { 1061 struct inode *inode = d_inode(dentry); 1062 struct f2fs_inode_info *fi = F2FS_I(inode); 1063 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1064 int err; 1065 1066 if (unlikely(f2fs_cp_error(sbi))) 1067 return -EIO; 1068 1069 err = setattr_prepare(idmap, dentry, attr); 1070 if (err) 1071 return err; 1072 1073 err = fscrypt_prepare_setattr(dentry, attr); 1074 if (err) 1075 return err; 1076 1077 err = fsverity_prepare_setattr(dentry, attr); 1078 if (err) 1079 return err; 1080 1081 if (unlikely(IS_IMMUTABLE(inode))) 1082 return -EPERM; 1083 1084 if (unlikely(IS_APPEND(inode) && 1085 (attr->ia_valid & (ATTR_MODE | ATTR_UID | 1086 ATTR_GID | ATTR_TIMES_SET)))) 1087 return -EPERM; 1088 1089 if ((attr->ia_valid & ATTR_SIZE)) { 1090 if (!f2fs_is_compress_backend_ready(inode) || 1091 IS_DEVICE_ALIASING(inode)) 1092 return -EOPNOTSUPP; 1093 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && 1094 !IS_ALIGNED(attr->ia_size, 1095 F2FS_BLK_TO_BYTES(fi->i_cluster_size))) 1096 return -EINVAL; 1097 /* 1098 * To prevent scattered pin block generation, we don't allow 1099 * smaller/equal size unaligned truncation for pinned file. 1100 * We only support overwrite IO to pinned file, so don't 1101 * care about larger size truncation. 1102 */ 1103 if (f2fs_is_pinned_file(inode) && 1104 attr->ia_size <= i_size_read(inode) && 1105 !IS_ALIGNED(attr->ia_size, 1106 F2FS_BLK_TO_BYTES(CAP_BLKS_PER_SEC(sbi)))) 1107 return -EINVAL; 1108 } 1109 1110 if (is_quota_modification(idmap, inode, attr)) { 1111 err = f2fs_dquot_initialize(inode); 1112 if (err) 1113 return err; 1114 } 1115 if (i_uid_needs_update(idmap, attr, inode) || 1116 i_gid_needs_update(idmap, attr, inode)) { 1117 f2fs_lock_op(sbi); 1118 err = dquot_transfer(idmap, inode, attr); 1119 if (err) { 1120 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); 1121 f2fs_unlock_op(sbi); 1122 return err; 1123 } 1124 /* 1125 * update uid/gid under lock_op(), so that dquot and inode can 1126 * be updated atomically. 1127 */ 1128 i_uid_update(idmap, attr, inode); 1129 i_gid_update(idmap, attr, inode); 1130 f2fs_mark_inode_dirty_sync(inode, true); 1131 f2fs_unlock_op(sbi); 1132 } 1133 1134 if (attr->ia_valid & ATTR_SIZE) { 1135 loff_t old_size = i_size_read(inode); 1136 1137 if (attr->ia_size > MAX_INLINE_DATA(inode)) { 1138 /* 1139 * should convert inline inode before i_size_write to 1140 * keep smaller than inline_data size with inline flag. 1141 */ 1142 err = f2fs_convert_inline_inode(inode); 1143 if (err) 1144 return err; 1145 } 1146 1147 /* 1148 * wait for inflight dio, blocks should be removed after 1149 * IO completion. 1150 */ 1151 if (attr->ia_size < old_size) 1152 inode_dio_wait(inode); 1153 1154 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 1155 filemap_invalidate_lock(inode->i_mapping); 1156 1157 if (attr->ia_size > old_size) 1158 f2fs_zero_post_eof_page(inode, attr->ia_size, false); 1159 truncate_setsize(inode, attr->ia_size); 1160 1161 if (attr->ia_size <= old_size) 1162 err = f2fs_truncate(inode); 1163 /* 1164 * do not trim all blocks after i_size if target size is 1165 * larger than i_size. 1166 */ 1167 filemap_invalidate_unlock(inode->i_mapping); 1168 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 1169 if (err) 1170 return err; 1171 1172 spin_lock(&fi->i_size_lock); 1173 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 1174 fi->last_disk_size = i_size_read(inode); 1175 spin_unlock(&fi->i_size_lock); 1176 } 1177 1178 __setattr_copy(idmap, inode, attr); 1179 1180 if (attr->ia_valid & ATTR_MODE) { 1181 err = posix_acl_chmod(idmap, dentry, f2fs_get_inode_mode(inode)); 1182 1183 if (is_inode_flag_set(inode, FI_ACL_MODE)) { 1184 if (!err) 1185 inode->i_mode = fi->i_acl_mode; 1186 clear_inode_flag(inode, FI_ACL_MODE); 1187 } 1188 } 1189 1190 /* file size may changed here */ 1191 f2fs_mark_inode_dirty_sync(inode, true); 1192 1193 /* inode change will produce dirty node pages flushed by checkpoint */ 1194 f2fs_balance_fs(sbi, true); 1195 1196 return err; 1197 } 1198 1199 const struct inode_operations f2fs_file_inode_operations = { 1200 .getattr = f2fs_getattr, 1201 .setattr = f2fs_setattr, 1202 .get_inode_acl = f2fs_get_acl, 1203 .set_acl = f2fs_set_acl, 1204 .listxattr = f2fs_listxattr, 1205 .fiemap = f2fs_fiemap, 1206 .fileattr_get = f2fs_fileattr_get, 1207 .fileattr_set = f2fs_fileattr_set, 1208 }; 1209 1210 static int fill_zero(struct inode *inode, pgoff_t index, 1211 loff_t start, loff_t len) 1212 { 1213 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1214 struct folio *folio; 1215 1216 if (!len) 1217 return 0; 1218 1219 f2fs_balance_fs(sbi, true); 1220 1221 f2fs_lock_op(sbi); 1222 folio = f2fs_get_new_data_folio(inode, NULL, index, false); 1223 f2fs_unlock_op(sbi); 1224 1225 if (IS_ERR(folio)) 1226 return PTR_ERR(folio); 1227 1228 f2fs_folio_wait_writeback(folio, DATA, true, true); 1229 folio_zero_range(folio, start, len); 1230 folio_mark_dirty(folio); 1231 f2fs_folio_put(folio, true); 1232 return 0; 1233 } 1234 1235 int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) 1236 { 1237 int err; 1238 1239 while (pg_start < pg_end) { 1240 struct dnode_of_data dn; 1241 pgoff_t end_offset, count; 1242 1243 set_new_dnode(&dn, inode, NULL, NULL, 0); 1244 err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); 1245 if (err) { 1246 if (err == -ENOENT) { 1247 pg_start = f2fs_get_next_page_offset(&dn, 1248 pg_start); 1249 continue; 1250 } 1251 return err; 1252 } 1253 1254 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 1255 count = min(end_offset - dn.ofs_in_node, pg_end - pg_start); 1256 1257 f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset); 1258 1259 f2fs_truncate_data_blocks_range(&dn, count); 1260 f2fs_put_dnode(&dn); 1261 1262 pg_start += count; 1263 } 1264 return 0; 1265 } 1266 1267 static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len) 1268 { 1269 pgoff_t pg_start, pg_end; 1270 loff_t off_start, off_end; 1271 int ret; 1272 1273 ret = f2fs_convert_inline_inode(inode); 1274 if (ret) 1275 return ret; 1276 1277 f2fs_zero_post_eof_page(inode, offset + len, true); 1278 1279 pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; 1280 pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; 1281 1282 off_start = offset & (PAGE_SIZE - 1); 1283 off_end = (offset + len) & (PAGE_SIZE - 1); 1284 1285 if (pg_start == pg_end) { 1286 ret = fill_zero(inode, pg_start, off_start, 1287 off_end - off_start); 1288 if (ret) 1289 return ret; 1290 } else { 1291 if (off_start) { 1292 ret = fill_zero(inode, pg_start++, off_start, 1293 PAGE_SIZE - off_start); 1294 if (ret) 1295 return ret; 1296 } 1297 if (off_end) { 1298 ret = fill_zero(inode, pg_end, 0, off_end); 1299 if (ret) 1300 return ret; 1301 } 1302 1303 if (pg_start < pg_end) { 1304 loff_t blk_start, blk_end; 1305 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1306 1307 f2fs_balance_fs(sbi, true); 1308 1309 blk_start = (loff_t)pg_start << PAGE_SHIFT; 1310 blk_end = (loff_t)pg_end << PAGE_SHIFT; 1311 1312 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1313 filemap_invalidate_lock(inode->i_mapping); 1314 1315 truncate_pagecache_range(inode, blk_start, blk_end - 1); 1316 1317 f2fs_lock_op(sbi); 1318 ret = f2fs_truncate_hole(inode, pg_start, pg_end); 1319 f2fs_unlock_op(sbi); 1320 1321 filemap_invalidate_unlock(inode->i_mapping); 1322 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1323 } 1324 } 1325 1326 return ret; 1327 } 1328 1329 static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, 1330 int *do_replace, pgoff_t off, pgoff_t len) 1331 { 1332 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1333 struct dnode_of_data dn; 1334 int ret, done, i; 1335 1336 next_dnode: 1337 set_new_dnode(&dn, inode, NULL, NULL, 0); 1338 ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); 1339 if (ret && ret != -ENOENT) { 1340 return ret; 1341 } else if (ret == -ENOENT) { 1342 if (dn.max_level == 0) 1343 return -ENOENT; 1344 done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - 1345 dn.ofs_in_node, len); 1346 blkaddr += done; 1347 do_replace += done; 1348 goto next; 1349 } 1350 1351 done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, inode) - 1352 dn.ofs_in_node, len); 1353 for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { 1354 *blkaddr = f2fs_data_blkaddr(&dn); 1355 1356 if (__is_valid_data_blkaddr(*blkaddr) && 1357 !f2fs_is_valid_blkaddr(sbi, *blkaddr, 1358 DATA_GENERIC_ENHANCE)) { 1359 f2fs_put_dnode(&dn); 1360 return -EFSCORRUPTED; 1361 } 1362 1363 if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { 1364 1365 if (f2fs_lfs_mode(sbi)) { 1366 f2fs_put_dnode(&dn); 1367 return -EOPNOTSUPP; 1368 } 1369 1370 /* do not invalidate this block address */ 1371 f2fs_update_data_blkaddr(&dn, NULL_ADDR); 1372 *do_replace = 1; 1373 } 1374 } 1375 f2fs_put_dnode(&dn); 1376 next: 1377 len -= done; 1378 off += done; 1379 if (len) 1380 goto next_dnode; 1381 return 0; 1382 } 1383 1384 static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, 1385 int *do_replace, pgoff_t off, int len) 1386 { 1387 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1388 struct dnode_of_data dn; 1389 int ret, i; 1390 1391 for (i = 0; i < len; i++, do_replace++, blkaddr++) { 1392 if (*do_replace == 0) 1393 continue; 1394 1395 set_new_dnode(&dn, inode, NULL, NULL, 0); 1396 ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); 1397 if (ret) { 1398 dec_valid_block_count(sbi, inode, 1); 1399 f2fs_invalidate_blocks(sbi, *blkaddr, 1); 1400 } else { 1401 f2fs_update_data_blkaddr(&dn, *blkaddr); 1402 } 1403 f2fs_put_dnode(&dn); 1404 } 1405 return 0; 1406 } 1407 1408 static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, 1409 block_t *blkaddr, int *do_replace, 1410 pgoff_t src, pgoff_t dst, pgoff_t len, bool full) 1411 { 1412 struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode); 1413 pgoff_t i = 0; 1414 int ret; 1415 1416 while (i < len) { 1417 if (blkaddr[i] == NULL_ADDR && !full) { 1418 i++; 1419 continue; 1420 } 1421 1422 if (do_replace[i] || blkaddr[i] == NULL_ADDR) { 1423 struct dnode_of_data dn; 1424 struct node_info ni; 1425 size_t new_size; 1426 pgoff_t ilen; 1427 1428 set_new_dnode(&dn, dst_inode, NULL, NULL, 0); 1429 ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE); 1430 if (ret) 1431 return ret; 1432 1433 ret = f2fs_get_node_info(sbi, dn.nid, &ni, false); 1434 if (ret) { 1435 f2fs_put_dnode(&dn); 1436 return ret; 1437 } 1438 1439 ilen = min((pgoff_t) 1440 ADDRS_PER_PAGE(dn.node_folio, dst_inode) - 1441 dn.ofs_in_node, len - i); 1442 do { 1443 dn.data_blkaddr = f2fs_data_blkaddr(&dn); 1444 f2fs_truncate_data_blocks_range(&dn, 1); 1445 1446 if (do_replace[i]) { 1447 f2fs_i_blocks_write(src_inode, 1448 1, false, false); 1449 f2fs_i_blocks_write(dst_inode, 1450 1, true, false); 1451 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, 1452 blkaddr[i], ni.version, true, false); 1453 1454 do_replace[i] = 0; 1455 } 1456 dn.ofs_in_node++; 1457 i++; 1458 new_size = (loff_t)(dst + i) << PAGE_SHIFT; 1459 if (dst_inode->i_size < new_size) 1460 f2fs_i_size_write(dst_inode, new_size); 1461 } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR)); 1462 1463 f2fs_put_dnode(&dn); 1464 } else { 1465 struct folio *fsrc, *fdst; 1466 1467 fsrc = f2fs_get_lock_data_folio(src_inode, 1468 src + i, true); 1469 if (IS_ERR(fsrc)) 1470 return PTR_ERR(fsrc); 1471 fdst = f2fs_get_new_data_folio(dst_inode, NULL, dst + i, 1472 true); 1473 if (IS_ERR(fdst)) { 1474 f2fs_folio_put(fsrc, true); 1475 return PTR_ERR(fdst); 1476 } 1477 1478 f2fs_folio_wait_writeback(fdst, DATA, true, true); 1479 1480 memcpy_folio(fdst, 0, fsrc, 0, PAGE_SIZE); 1481 folio_mark_dirty(fdst); 1482 folio_set_f2fs_gcing(fdst); 1483 f2fs_folio_put(fdst, true); 1484 f2fs_folio_put(fsrc, true); 1485 1486 ret = f2fs_truncate_hole(src_inode, 1487 src + i, src + i + 1); 1488 if (ret) 1489 return ret; 1490 i++; 1491 } 1492 } 1493 return 0; 1494 } 1495 1496 static int __exchange_data_block(struct inode *src_inode, 1497 struct inode *dst_inode, pgoff_t src, pgoff_t dst, 1498 pgoff_t len, bool full) 1499 { 1500 block_t *src_blkaddr; 1501 int *do_replace; 1502 pgoff_t olen; 1503 int ret; 1504 1505 while (len) { 1506 olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len); 1507 1508 src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode), 1509 array_size(olen, sizeof(block_t)), 1510 GFP_NOFS); 1511 if (!src_blkaddr) 1512 return -ENOMEM; 1513 1514 do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode), 1515 array_size(olen, sizeof(int)), 1516 GFP_NOFS); 1517 if (!do_replace) { 1518 kvfree(src_blkaddr); 1519 return -ENOMEM; 1520 } 1521 1522 ret = __read_out_blkaddrs(src_inode, src_blkaddr, 1523 do_replace, src, olen); 1524 if (ret) 1525 goto roll_back; 1526 1527 ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr, 1528 do_replace, src, dst, olen, full); 1529 if (ret) 1530 goto roll_back; 1531 1532 src += olen; 1533 dst += olen; 1534 len -= olen; 1535 1536 kvfree(src_blkaddr); 1537 kvfree(do_replace); 1538 } 1539 return 0; 1540 1541 roll_back: 1542 __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen); 1543 kvfree(src_blkaddr); 1544 kvfree(do_replace); 1545 return ret; 1546 } 1547 1548 static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) 1549 { 1550 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1551 pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 1552 pgoff_t start = offset >> PAGE_SHIFT; 1553 pgoff_t end = (offset + len) >> PAGE_SHIFT; 1554 int ret; 1555 1556 f2fs_balance_fs(sbi, true); 1557 1558 /* avoid gc operation during block exchange */ 1559 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1560 filemap_invalidate_lock(inode->i_mapping); 1561 1562 f2fs_zero_post_eof_page(inode, offset + len, false); 1563 1564 f2fs_lock_op(sbi); 1565 f2fs_drop_extent_tree(inode); 1566 truncate_pagecache(inode, offset); 1567 ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true); 1568 f2fs_unlock_op(sbi); 1569 1570 filemap_invalidate_unlock(inode->i_mapping); 1571 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1572 return ret; 1573 } 1574 1575 static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) 1576 { 1577 loff_t new_size; 1578 int ret; 1579 1580 if (offset + len >= i_size_read(inode)) 1581 return -EINVAL; 1582 1583 /* collapse range should be aligned to block size of f2fs. */ 1584 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) 1585 return -EINVAL; 1586 1587 ret = f2fs_convert_inline_inode(inode); 1588 if (ret) 1589 return ret; 1590 1591 /* write out all dirty pages from offset */ 1592 ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); 1593 if (ret) 1594 return ret; 1595 1596 ret = f2fs_do_collapse(inode, offset, len); 1597 if (ret) 1598 return ret; 1599 1600 /* write out all moved pages, if possible */ 1601 filemap_invalidate_lock(inode->i_mapping); 1602 filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); 1603 truncate_pagecache(inode, offset); 1604 1605 new_size = i_size_read(inode) - len; 1606 ret = f2fs_truncate_blocks(inode, new_size, true); 1607 filemap_invalidate_unlock(inode->i_mapping); 1608 if (!ret) 1609 f2fs_i_size_write(inode, new_size); 1610 return ret; 1611 } 1612 1613 static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, 1614 pgoff_t end) 1615 { 1616 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1617 pgoff_t index = start; 1618 unsigned int ofs_in_node = dn->ofs_in_node; 1619 blkcnt_t count = 0; 1620 int ret; 1621 1622 for (; index < end; index++, dn->ofs_in_node++) { 1623 if (f2fs_data_blkaddr(dn) == NULL_ADDR) 1624 count++; 1625 } 1626 1627 dn->ofs_in_node = ofs_in_node; 1628 ret = f2fs_reserve_new_blocks(dn, count); 1629 if (ret) 1630 return ret; 1631 1632 dn->ofs_in_node = ofs_in_node; 1633 for (index = start; index < end; index++, dn->ofs_in_node++) { 1634 dn->data_blkaddr = f2fs_data_blkaddr(dn); 1635 /* 1636 * f2fs_reserve_new_blocks will not guarantee entire block 1637 * allocation. 1638 */ 1639 if (dn->data_blkaddr == NULL_ADDR) { 1640 ret = -ENOSPC; 1641 break; 1642 } 1643 1644 if (dn->data_blkaddr == NEW_ADDR) 1645 continue; 1646 1647 if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr, 1648 DATA_GENERIC_ENHANCE)) { 1649 ret = -EFSCORRUPTED; 1650 break; 1651 } 1652 1653 f2fs_invalidate_blocks(sbi, dn->data_blkaddr, 1); 1654 f2fs_set_data_blkaddr(dn, NEW_ADDR); 1655 } 1656 1657 if (index > start) { 1658 f2fs_update_read_extent_cache_range(dn, start, 0, 1659 index - start); 1660 f2fs_update_age_extent_cache_range(dn, start, index - start); 1661 } 1662 1663 return ret; 1664 } 1665 1666 static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, 1667 int mode) 1668 { 1669 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1670 struct address_space *mapping = inode->i_mapping; 1671 pgoff_t index, pg_start, pg_end; 1672 loff_t new_size = i_size_read(inode); 1673 loff_t off_start, off_end; 1674 int ret = 0; 1675 1676 ret = inode_newsize_ok(inode, (len + offset)); 1677 if (ret) 1678 return ret; 1679 1680 ret = f2fs_convert_inline_inode(inode); 1681 if (ret) 1682 return ret; 1683 1684 ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); 1685 if (ret) 1686 return ret; 1687 1688 f2fs_zero_post_eof_page(inode, offset + len, true); 1689 1690 pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; 1691 pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; 1692 1693 off_start = offset & (PAGE_SIZE - 1); 1694 off_end = (offset + len) & (PAGE_SIZE - 1); 1695 1696 if (pg_start == pg_end) { 1697 ret = fill_zero(inode, pg_start, off_start, 1698 off_end - off_start); 1699 if (ret) 1700 return ret; 1701 1702 new_size = max_t(loff_t, new_size, offset + len); 1703 } else { 1704 if (off_start) { 1705 ret = fill_zero(inode, pg_start++, off_start, 1706 PAGE_SIZE - off_start); 1707 if (ret) 1708 return ret; 1709 1710 new_size = max_t(loff_t, new_size, 1711 (loff_t)pg_start << PAGE_SHIFT); 1712 } 1713 1714 for (index = pg_start; index < pg_end;) { 1715 struct dnode_of_data dn; 1716 unsigned int end_offset; 1717 pgoff_t end; 1718 1719 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1720 filemap_invalidate_lock(mapping); 1721 1722 truncate_pagecache_range(inode, 1723 (loff_t)index << PAGE_SHIFT, 1724 ((loff_t)pg_end << PAGE_SHIFT) - 1); 1725 1726 f2fs_lock_op(sbi); 1727 1728 set_new_dnode(&dn, inode, NULL, NULL, 0); 1729 ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE); 1730 if (ret) { 1731 f2fs_unlock_op(sbi); 1732 filemap_invalidate_unlock(mapping); 1733 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1734 goto out; 1735 } 1736 1737 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 1738 end = min(pg_end, end_offset - dn.ofs_in_node + index); 1739 1740 ret = f2fs_do_zero_range(&dn, index, end); 1741 f2fs_put_dnode(&dn); 1742 1743 f2fs_unlock_op(sbi); 1744 filemap_invalidate_unlock(mapping); 1745 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1746 1747 f2fs_balance_fs(sbi, dn.node_changed); 1748 1749 if (ret) 1750 goto out; 1751 1752 index = end; 1753 new_size = max_t(loff_t, new_size, 1754 (loff_t)index << PAGE_SHIFT); 1755 } 1756 1757 if (off_end) { 1758 ret = fill_zero(inode, pg_end, 0, off_end); 1759 if (ret) 1760 goto out; 1761 1762 new_size = max_t(loff_t, new_size, offset + len); 1763 } 1764 } 1765 1766 out: 1767 if (new_size > i_size_read(inode)) { 1768 if (mode & FALLOC_FL_KEEP_SIZE) 1769 file_set_keep_isize(inode); 1770 else 1771 f2fs_i_size_write(inode, new_size); 1772 } 1773 return ret; 1774 } 1775 1776 static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) 1777 { 1778 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1779 struct address_space *mapping = inode->i_mapping; 1780 pgoff_t nr, pg_start, pg_end, delta, idx; 1781 loff_t new_size; 1782 int ret = 0; 1783 1784 new_size = i_size_read(inode) + len; 1785 ret = inode_newsize_ok(inode, new_size); 1786 if (ret) 1787 return ret; 1788 1789 if (offset >= i_size_read(inode)) 1790 return -EINVAL; 1791 1792 /* insert range should be aligned to block size of f2fs. */ 1793 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) 1794 return -EINVAL; 1795 1796 ret = f2fs_convert_inline_inode(inode); 1797 if (ret) 1798 return ret; 1799 1800 f2fs_balance_fs(sbi, true); 1801 1802 filemap_invalidate_lock(mapping); 1803 ret = f2fs_truncate_blocks(inode, i_size_read(inode), true); 1804 filemap_invalidate_unlock(mapping); 1805 if (ret) 1806 return ret; 1807 1808 /* write out all dirty pages from offset */ 1809 ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX); 1810 if (ret) 1811 return ret; 1812 1813 pg_start = offset >> PAGE_SHIFT; 1814 pg_end = (offset + len) >> PAGE_SHIFT; 1815 delta = pg_end - pg_start; 1816 idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 1817 1818 /* avoid gc operation during block exchange */ 1819 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1820 filemap_invalidate_lock(mapping); 1821 1822 f2fs_zero_post_eof_page(inode, offset + len, false); 1823 truncate_pagecache(inode, offset); 1824 1825 while (!ret && idx > pg_start) { 1826 nr = idx - pg_start; 1827 if (nr > delta) 1828 nr = delta; 1829 idx -= nr; 1830 1831 f2fs_lock_op(sbi); 1832 f2fs_drop_extent_tree(inode); 1833 1834 ret = __exchange_data_block(inode, inode, idx, 1835 idx + delta, nr, false); 1836 f2fs_unlock_op(sbi); 1837 } 1838 filemap_invalidate_unlock(mapping); 1839 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1840 if (ret) 1841 return ret; 1842 1843 /* write out all moved pages, if possible */ 1844 filemap_invalidate_lock(mapping); 1845 ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX); 1846 truncate_pagecache(inode, offset); 1847 filemap_invalidate_unlock(mapping); 1848 1849 if (!ret) 1850 f2fs_i_size_write(inode, new_size); 1851 return ret; 1852 } 1853 1854 static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, 1855 loff_t len, int mode) 1856 { 1857 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1858 struct f2fs_map_blocks map = { .m_next_pgofs = NULL, 1859 .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, 1860 .m_may_create = true }; 1861 struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, 1862 .init_gc_type = FG_GC, 1863 .should_migrate_blocks = false, 1864 .err_gc_skipped = true, 1865 .nr_free_secs = 0 }; 1866 pgoff_t pg_start, pg_end; 1867 loff_t new_size; 1868 loff_t off_end; 1869 block_t expanded = 0; 1870 int err; 1871 1872 err = inode_newsize_ok(inode, (len + offset)); 1873 if (err) 1874 return err; 1875 1876 err = f2fs_convert_inline_inode(inode); 1877 if (err) 1878 return err; 1879 1880 f2fs_zero_post_eof_page(inode, offset + len, true); 1881 1882 f2fs_balance_fs(sbi, true); 1883 1884 pg_start = ((unsigned long long)offset) >> PAGE_SHIFT; 1885 pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT; 1886 off_end = (offset + len) & (PAGE_SIZE - 1); 1887 1888 map.m_lblk = pg_start; 1889 map.m_len = pg_end - pg_start; 1890 if (off_end) 1891 map.m_len++; 1892 1893 if (!map.m_len) 1894 return 0; 1895 1896 if (f2fs_is_pinned_file(inode)) { 1897 block_t sec_blks = CAP_BLKS_PER_SEC(sbi); 1898 block_t sec_len = roundup(map.m_len, sec_blks); 1899 1900 map.m_len = sec_blks; 1901 next_alloc: 1902 f2fs_down_write(&sbi->pin_sem); 1903 1904 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 1905 if (has_not_enough_free_secs(sbi, 0, 0)) { 1906 f2fs_up_write(&sbi->pin_sem); 1907 err = -ENOSPC; 1908 f2fs_warn_ratelimited(sbi, 1909 "ino:%lu, start:%lu, end:%lu, need to trigger GC to " 1910 "reclaim enough free segment when checkpoint is enabled", 1911 inode->i_ino, pg_start, pg_end); 1912 goto out_err; 1913 } 1914 } 1915 1916 if (has_not_enough_free_secs(sbi, 0, 1917 sbi->reserved_pin_section)) { 1918 f2fs_down_write(&sbi->gc_lock); 1919 stat_inc_gc_call_count(sbi, FOREGROUND); 1920 err = f2fs_gc(sbi, &gc_control); 1921 if (err && err != -ENODATA) { 1922 f2fs_up_write(&sbi->pin_sem); 1923 goto out_err; 1924 } 1925 } 1926 1927 err = f2fs_allocate_pinning_section(sbi); 1928 if (err) { 1929 f2fs_up_write(&sbi->pin_sem); 1930 goto out_err; 1931 } 1932 1933 map.m_seg_type = CURSEG_COLD_DATA_PINNED; 1934 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO); 1935 file_dont_truncate(inode); 1936 1937 f2fs_up_write(&sbi->pin_sem); 1938 1939 expanded += map.m_len; 1940 sec_len -= map.m_len; 1941 map.m_lblk += map.m_len; 1942 if (!err && sec_len) 1943 goto next_alloc; 1944 1945 map.m_len = expanded; 1946 } else { 1947 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_AIO); 1948 expanded = map.m_len; 1949 } 1950 out_err: 1951 if (err) { 1952 pgoff_t last_off; 1953 1954 if (!expanded) 1955 return err; 1956 1957 last_off = pg_start + expanded - 1; 1958 1959 /* update new size to the failed position */ 1960 new_size = (last_off == pg_end) ? offset + len : 1961 (loff_t)(last_off + 1) << PAGE_SHIFT; 1962 } else { 1963 new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; 1964 } 1965 1966 if (new_size > i_size_read(inode)) { 1967 if (mode & FALLOC_FL_KEEP_SIZE) 1968 file_set_keep_isize(inode); 1969 else 1970 f2fs_i_size_write(inode, new_size); 1971 } 1972 1973 return err; 1974 } 1975 1976 static long f2fs_fallocate(struct file *file, int mode, 1977 loff_t offset, loff_t len) 1978 { 1979 struct inode *inode = file_inode(file); 1980 long ret = 0; 1981 1982 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 1983 return -EIO; 1984 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) 1985 return -ENOSPC; 1986 if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) 1987 return -EOPNOTSUPP; 1988 1989 /* f2fs only support ->fallocate for regular file */ 1990 if (!S_ISREG(inode->i_mode)) 1991 return -EINVAL; 1992 1993 if (IS_ENCRYPTED(inode) && 1994 (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) 1995 return -EOPNOTSUPP; 1996 1997 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 1998 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | 1999 FALLOC_FL_INSERT_RANGE)) 2000 return -EOPNOTSUPP; 2001 2002 inode_lock(inode); 2003 2004 /* 2005 * Pinned file should not support partial truncation since the block 2006 * can be used by applications. 2007 */ 2008 if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && 2009 (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | 2010 FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) { 2011 ret = -EOPNOTSUPP; 2012 goto out; 2013 } 2014 2015 ret = file_modified(file); 2016 if (ret) 2017 goto out; 2018 2019 /* 2020 * wait for inflight dio, blocks should be removed after IO 2021 * completion. 2022 */ 2023 inode_dio_wait(inode); 2024 2025 if (mode & FALLOC_FL_PUNCH_HOLE) { 2026 if (offset >= inode->i_size) 2027 goto out; 2028 2029 ret = f2fs_punch_hole(inode, offset, len); 2030 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { 2031 ret = f2fs_collapse_range(inode, offset, len); 2032 } else if (mode & FALLOC_FL_ZERO_RANGE) { 2033 ret = f2fs_zero_range(inode, offset, len, mode); 2034 } else if (mode & FALLOC_FL_INSERT_RANGE) { 2035 ret = f2fs_insert_range(inode, offset, len); 2036 } else { 2037 ret = f2fs_expand_inode_data(inode, offset, len, mode); 2038 } 2039 2040 if (!ret) { 2041 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 2042 f2fs_mark_inode_dirty_sync(inode, false); 2043 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2044 } 2045 2046 out: 2047 inode_unlock(inode); 2048 2049 trace_f2fs_fallocate(inode, mode, offset, len, ret); 2050 return ret; 2051 } 2052 2053 static int f2fs_release_file(struct inode *inode, struct file *filp) 2054 { 2055 if (atomic_dec_and_test(&F2FS_I(inode)->open_count)) 2056 f2fs_remove_donate_inode(inode); 2057 2058 /* 2059 * f2fs_release_file is called at every close calls. So we should 2060 * not drop any inmemory pages by close called by other process. 2061 */ 2062 if (!(filp->f_mode & FMODE_WRITE) || 2063 atomic_read(&inode->i_writecount) != 1) 2064 return 0; 2065 2066 inode_lock(inode); 2067 f2fs_abort_atomic_write(inode, true); 2068 inode_unlock(inode); 2069 2070 return 0; 2071 } 2072 2073 static int f2fs_file_flush(struct file *file, fl_owner_t id) 2074 { 2075 struct inode *inode = file_inode(file); 2076 2077 /* 2078 * If the process doing a transaction is crashed, we should do 2079 * roll-back. Otherwise, other reader/write can see corrupted database 2080 * until all the writers close its file. Since this should be done 2081 * before dropping file lock, it needs to do in ->flush. 2082 */ 2083 if (F2FS_I(inode)->atomic_write_task == current && 2084 (current->flags & PF_EXITING)) { 2085 inode_lock(inode); 2086 f2fs_abort_atomic_write(inode, true); 2087 inode_unlock(inode); 2088 } 2089 2090 return 0; 2091 } 2092 2093 static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) 2094 { 2095 struct f2fs_inode_info *fi = F2FS_I(inode); 2096 u32 masked_flags = fi->i_flags & mask; 2097 2098 /* mask can be shrunk by flags_valid selector */ 2099 iflags &= mask; 2100 2101 /* Is it quota file? Do not allow user to mess with it */ 2102 if (IS_NOQUOTA(inode)) 2103 return -EPERM; 2104 2105 if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) { 2106 if (!f2fs_sb_has_casefold(F2FS_I_SB(inode))) 2107 return -EOPNOTSUPP; 2108 if (!f2fs_empty_dir(inode)) 2109 return -ENOTEMPTY; 2110 } 2111 2112 if (iflags & (F2FS_COMPR_FL | F2FS_NOCOMP_FL)) { 2113 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 2114 return -EOPNOTSUPP; 2115 if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL)) 2116 return -EINVAL; 2117 } 2118 2119 if ((iflags ^ masked_flags) & F2FS_COMPR_FL) { 2120 if (masked_flags & F2FS_COMPR_FL) { 2121 if (!f2fs_disable_compressed_file(inode)) 2122 return -EINVAL; 2123 } else { 2124 /* try to convert inline_data to support compression */ 2125 int err = f2fs_convert_inline_inode(inode); 2126 if (err) 2127 return err; 2128 2129 f2fs_down_write(&fi->i_sem); 2130 if (!f2fs_may_compress(inode) || 2131 atomic_read(&fi->writeback) || 2132 (S_ISREG(inode->i_mode) && 2133 F2FS_HAS_BLOCKS(inode))) { 2134 f2fs_up_write(&fi->i_sem); 2135 return -EINVAL; 2136 } 2137 err = set_compress_context(inode); 2138 f2fs_up_write(&fi->i_sem); 2139 2140 if (err) 2141 return err; 2142 } 2143 } 2144 2145 fi->i_flags = iflags | (fi->i_flags & ~mask); 2146 f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) && 2147 (fi->i_flags & F2FS_NOCOMP_FL)); 2148 2149 if (fi->i_flags & F2FS_PROJINHERIT_FL) 2150 set_inode_flag(inode, FI_PROJ_INHERIT); 2151 else 2152 clear_inode_flag(inode, FI_PROJ_INHERIT); 2153 2154 inode_set_ctime_current(inode); 2155 f2fs_set_inode_flags(inode); 2156 f2fs_mark_inode_dirty_sync(inode, true); 2157 return 0; 2158 } 2159 2160 /* FS_IOC_[GS]ETFLAGS and FS_IOC_FS[GS]ETXATTR support */ 2161 2162 /* 2163 * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry 2164 * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to 2165 * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add 2166 * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL. 2167 * 2168 * Translating flags to fsx_flags value used by FS_IOC_FSGETXATTR and 2169 * FS_IOC_FSSETXATTR is done by the VFS. 2170 */ 2171 2172 static const struct { 2173 u32 iflag; 2174 u32 fsflag; 2175 } f2fs_fsflags_map[] = { 2176 { F2FS_COMPR_FL, FS_COMPR_FL }, 2177 { F2FS_SYNC_FL, FS_SYNC_FL }, 2178 { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL }, 2179 { F2FS_APPEND_FL, FS_APPEND_FL }, 2180 { F2FS_NODUMP_FL, FS_NODUMP_FL }, 2181 { F2FS_NOATIME_FL, FS_NOATIME_FL }, 2182 { F2FS_NOCOMP_FL, FS_NOCOMP_FL }, 2183 { F2FS_INDEX_FL, FS_INDEX_FL }, 2184 { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL }, 2185 { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL }, 2186 { F2FS_CASEFOLD_FL, FS_CASEFOLD_FL }, 2187 }; 2188 2189 #define F2FS_GETTABLE_FS_FL ( \ 2190 FS_COMPR_FL | \ 2191 FS_SYNC_FL | \ 2192 FS_IMMUTABLE_FL | \ 2193 FS_APPEND_FL | \ 2194 FS_NODUMP_FL | \ 2195 FS_NOATIME_FL | \ 2196 FS_NOCOMP_FL | \ 2197 FS_INDEX_FL | \ 2198 FS_DIRSYNC_FL | \ 2199 FS_PROJINHERIT_FL | \ 2200 FS_ENCRYPT_FL | \ 2201 FS_INLINE_DATA_FL | \ 2202 FS_NOCOW_FL | \ 2203 FS_VERITY_FL | \ 2204 FS_CASEFOLD_FL) 2205 2206 #define F2FS_SETTABLE_FS_FL ( \ 2207 FS_COMPR_FL | \ 2208 FS_SYNC_FL | \ 2209 FS_IMMUTABLE_FL | \ 2210 FS_APPEND_FL | \ 2211 FS_NODUMP_FL | \ 2212 FS_NOATIME_FL | \ 2213 FS_NOCOMP_FL | \ 2214 FS_DIRSYNC_FL | \ 2215 FS_PROJINHERIT_FL | \ 2216 FS_CASEFOLD_FL) 2217 2218 /* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */ 2219 static inline u32 f2fs_iflags_to_fsflags(u32 iflags) 2220 { 2221 u32 fsflags = 0; 2222 int i; 2223 2224 for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) 2225 if (iflags & f2fs_fsflags_map[i].iflag) 2226 fsflags |= f2fs_fsflags_map[i].fsflag; 2227 2228 return fsflags; 2229 } 2230 2231 /* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */ 2232 static inline u32 f2fs_fsflags_to_iflags(u32 fsflags) 2233 { 2234 u32 iflags = 0; 2235 int i; 2236 2237 for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) 2238 if (fsflags & f2fs_fsflags_map[i].fsflag) 2239 iflags |= f2fs_fsflags_map[i].iflag; 2240 2241 return iflags; 2242 } 2243 2244 static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) 2245 { 2246 struct inode *inode = file_inode(filp); 2247 2248 return put_user(inode->i_generation, (int __user *)arg); 2249 } 2250 2251 static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) 2252 { 2253 struct inode *inode = file_inode(filp); 2254 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2255 struct f2fs_inode_info *fi = F2FS_I(inode); 2256 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2257 loff_t isize; 2258 int ret; 2259 2260 if (!(filp->f_mode & FMODE_WRITE)) 2261 return -EBADF; 2262 2263 if (!inode_owner_or_capable(idmap, inode)) 2264 return -EACCES; 2265 2266 if (!S_ISREG(inode->i_mode)) 2267 return -EINVAL; 2268 2269 if (filp->f_flags & O_DIRECT) 2270 return -EINVAL; 2271 2272 ret = mnt_want_write_file(filp); 2273 if (ret) 2274 return ret; 2275 2276 inode_lock(inode); 2277 2278 if (!f2fs_disable_compressed_file(inode) || 2279 f2fs_is_pinned_file(inode)) { 2280 ret = -EINVAL; 2281 goto out; 2282 } 2283 2284 if (f2fs_is_atomic_file(inode)) 2285 goto out; 2286 2287 ret = f2fs_convert_inline_inode(inode); 2288 if (ret) 2289 goto out; 2290 2291 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 2292 f2fs_down_write(&fi->i_gc_rwsem[READ]); 2293 2294 /* 2295 * Should wait end_io to count F2FS_WB_CP_DATA correctly by 2296 * f2fs_is_atomic_file. 2297 */ 2298 if (get_dirty_pages(inode)) 2299 f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u", 2300 inode->i_ino, get_dirty_pages(inode)); 2301 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 2302 if (ret) 2303 goto out_unlock; 2304 2305 /* Check if the inode already has a COW inode */ 2306 if (fi->cow_inode == NULL) { 2307 /* Create a COW inode for atomic write */ 2308 struct dentry *dentry = file_dentry(filp); 2309 struct inode *dir = d_inode(dentry->d_parent); 2310 2311 ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode); 2312 if (ret) 2313 goto out_unlock; 2314 2315 set_inode_flag(fi->cow_inode, FI_COW_FILE); 2316 clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); 2317 2318 /* Set the COW inode's atomic_inode to the atomic inode */ 2319 F2FS_I(fi->cow_inode)->atomic_inode = inode; 2320 } else { 2321 /* Reuse the already created COW inode */ 2322 f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode)); 2323 2324 invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1); 2325 2326 ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); 2327 if (ret) 2328 goto out_unlock; 2329 } 2330 2331 f2fs_write_inode(inode, NULL); 2332 2333 stat_inc_atomic_inode(inode); 2334 2335 set_inode_flag(inode, FI_ATOMIC_FILE); 2336 2337 isize = i_size_read(inode); 2338 fi->original_i_size = isize; 2339 if (truncate) { 2340 set_inode_flag(inode, FI_ATOMIC_REPLACE); 2341 truncate_inode_pages_final(inode->i_mapping); 2342 f2fs_i_size_write(inode, 0); 2343 isize = 0; 2344 } 2345 f2fs_i_size_write(fi->cow_inode, isize); 2346 2347 out_unlock: 2348 f2fs_up_write(&fi->i_gc_rwsem[READ]); 2349 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 2350 if (ret) 2351 goto out; 2352 2353 f2fs_update_time(sbi, REQ_TIME); 2354 fi->atomic_write_task = current; 2355 stat_update_max_atomic_write(inode); 2356 fi->atomic_write_cnt = 0; 2357 out: 2358 inode_unlock(inode); 2359 mnt_drop_write_file(filp); 2360 return ret; 2361 } 2362 2363 static int f2fs_ioc_commit_atomic_write(struct file *filp) 2364 { 2365 struct inode *inode = file_inode(filp); 2366 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2367 int ret; 2368 2369 if (!(filp->f_mode & FMODE_WRITE)) 2370 return -EBADF; 2371 2372 if (!inode_owner_or_capable(idmap, inode)) 2373 return -EACCES; 2374 2375 ret = mnt_want_write_file(filp); 2376 if (ret) 2377 return ret; 2378 2379 f2fs_balance_fs(F2FS_I_SB(inode), true); 2380 2381 inode_lock(inode); 2382 2383 if (f2fs_is_atomic_file(inode)) { 2384 ret = f2fs_commit_atomic_write(inode); 2385 if (!ret) 2386 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); 2387 2388 f2fs_abort_atomic_write(inode, ret); 2389 } else { 2390 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); 2391 } 2392 2393 inode_unlock(inode); 2394 mnt_drop_write_file(filp); 2395 return ret; 2396 } 2397 2398 static int f2fs_ioc_abort_atomic_write(struct file *filp) 2399 { 2400 struct inode *inode = file_inode(filp); 2401 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2402 int ret; 2403 2404 if (!(filp->f_mode & FMODE_WRITE)) 2405 return -EBADF; 2406 2407 if (!inode_owner_or_capable(idmap, inode)) 2408 return -EACCES; 2409 2410 ret = mnt_want_write_file(filp); 2411 if (ret) 2412 return ret; 2413 2414 inode_lock(inode); 2415 2416 f2fs_abort_atomic_write(inode, true); 2417 2418 inode_unlock(inode); 2419 2420 mnt_drop_write_file(filp); 2421 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2422 return ret; 2423 } 2424 2425 int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, 2426 bool readonly, bool need_lock) 2427 { 2428 struct super_block *sb = sbi->sb; 2429 int ret = 0; 2430 2431 switch (flag) { 2432 case F2FS_GOING_DOWN_FULLSYNC: 2433 ret = bdev_freeze(sb->s_bdev); 2434 if (ret) 2435 goto out; 2436 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2437 bdev_thaw(sb->s_bdev); 2438 break; 2439 case F2FS_GOING_DOWN_METASYNC: 2440 /* do checkpoint only */ 2441 ret = f2fs_sync_fs(sb, 1); 2442 if (ret) { 2443 if (ret == -EIO) 2444 ret = 0; 2445 goto out; 2446 } 2447 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2448 break; 2449 case F2FS_GOING_DOWN_NOSYNC: 2450 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2451 break; 2452 case F2FS_GOING_DOWN_METAFLUSH: 2453 f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); 2454 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2455 break; 2456 case F2FS_GOING_DOWN_NEED_FSCK: 2457 set_sbi_flag(sbi, SBI_NEED_FSCK); 2458 set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); 2459 set_sbi_flag(sbi, SBI_IS_DIRTY); 2460 /* do checkpoint only */ 2461 ret = f2fs_sync_fs(sb, 1); 2462 if (ret == -EIO) 2463 ret = 0; 2464 goto out; 2465 default: 2466 ret = -EINVAL; 2467 goto out; 2468 } 2469 2470 if (readonly) 2471 goto out; 2472 2473 /* 2474 * grab sb->s_umount to avoid racing w/ remount() and other shutdown 2475 * paths. 2476 */ 2477 if (need_lock) 2478 down_write(&sbi->sb->s_umount); 2479 2480 f2fs_stop_gc_thread(sbi); 2481 f2fs_stop_discard_thread(sbi); 2482 2483 f2fs_drop_discard_cmd(sbi); 2484 clear_opt(sbi, DISCARD); 2485 2486 if (need_lock) 2487 up_write(&sbi->sb->s_umount); 2488 2489 f2fs_update_time(sbi, REQ_TIME); 2490 out: 2491 2492 trace_f2fs_shutdown(sbi, flag, ret); 2493 2494 return ret; 2495 } 2496 2497 static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) 2498 { 2499 struct inode *inode = file_inode(filp); 2500 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2501 __u32 in; 2502 int ret; 2503 bool need_drop = false, readonly = false; 2504 2505 if (!capable(CAP_SYS_ADMIN)) 2506 return -EPERM; 2507 2508 if (get_user(in, (__u32 __user *)arg)) 2509 return -EFAULT; 2510 2511 if (in != F2FS_GOING_DOWN_FULLSYNC) { 2512 ret = mnt_want_write_file(filp); 2513 if (ret) { 2514 if (ret != -EROFS) 2515 return ret; 2516 2517 /* fallback to nosync shutdown for readonly fs */ 2518 in = F2FS_GOING_DOWN_NOSYNC; 2519 readonly = true; 2520 } else { 2521 need_drop = true; 2522 } 2523 } 2524 2525 ret = f2fs_do_shutdown(sbi, in, readonly, true); 2526 2527 if (need_drop) 2528 mnt_drop_write_file(filp); 2529 2530 return ret; 2531 } 2532 2533 static int f2fs_keep_noreuse_range(struct inode *inode, 2534 loff_t offset, loff_t len) 2535 { 2536 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2537 u64 max_bytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 2538 u64 start, end; 2539 int ret = 0; 2540 2541 if (!S_ISREG(inode->i_mode)) 2542 return 0; 2543 2544 if (offset >= max_bytes || len > max_bytes || 2545 (offset + len) > max_bytes) 2546 return 0; 2547 2548 start = offset >> PAGE_SHIFT; 2549 end = DIV_ROUND_UP(offset + len, PAGE_SIZE); 2550 2551 inode_lock(inode); 2552 if (f2fs_is_atomic_file(inode)) { 2553 inode_unlock(inode); 2554 return 0; 2555 } 2556 2557 spin_lock(&sbi->inode_lock[DONATE_INODE]); 2558 /* let's remove the range, if len = 0 */ 2559 if (!len) { 2560 if (!list_empty(&F2FS_I(inode)->gdonate_list)) { 2561 list_del_init(&F2FS_I(inode)->gdonate_list); 2562 sbi->donate_files--; 2563 if (is_inode_flag_set(inode, FI_DONATE_FINISHED)) 2564 ret = -EALREADY; 2565 else 2566 set_inode_flag(inode, FI_DONATE_FINISHED); 2567 } else 2568 ret = -ENOENT; 2569 } else { 2570 if (list_empty(&F2FS_I(inode)->gdonate_list)) { 2571 list_add_tail(&F2FS_I(inode)->gdonate_list, 2572 &sbi->inode_list[DONATE_INODE]); 2573 sbi->donate_files++; 2574 } else { 2575 list_move_tail(&F2FS_I(inode)->gdonate_list, 2576 &sbi->inode_list[DONATE_INODE]); 2577 } 2578 F2FS_I(inode)->donate_start = start; 2579 F2FS_I(inode)->donate_end = end - 1; 2580 clear_inode_flag(inode, FI_DONATE_FINISHED); 2581 } 2582 spin_unlock(&sbi->inode_lock[DONATE_INODE]); 2583 inode_unlock(inode); 2584 2585 return ret; 2586 } 2587 2588 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) 2589 { 2590 struct inode *inode = file_inode(filp); 2591 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2592 struct fstrim_range range; 2593 int ret; 2594 2595 if (!capable(CAP_SYS_ADMIN)) 2596 return -EPERM; 2597 2598 if (!f2fs_hw_support_discard(sbi)) 2599 return -EOPNOTSUPP; 2600 2601 if (copy_from_user(&range, (struct fstrim_range __user *)arg, 2602 sizeof(range))) 2603 return -EFAULT; 2604 2605 ret = mnt_want_write_file(filp); 2606 if (ret) 2607 return ret; 2608 2609 range.minlen = max_t(unsigned int, range.minlen, 2610 f2fs_hw_discard_granularity(sbi)); 2611 ret = f2fs_trim_fs(sbi, &range); 2612 mnt_drop_write_file(filp); 2613 if (ret < 0) 2614 return ret; 2615 2616 if (copy_to_user((struct fstrim_range __user *)arg, &range, 2617 sizeof(range))) 2618 return -EFAULT; 2619 f2fs_update_time(sbi, REQ_TIME); 2620 return 0; 2621 } 2622 2623 static bool uuid_is_nonzero(__u8 u[16]) 2624 { 2625 int i; 2626 2627 for (i = 0; i < 16; i++) 2628 if (u[i]) 2629 return true; 2630 return false; 2631 } 2632 2633 static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) 2634 { 2635 struct inode *inode = file_inode(filp); 2636 int ret; 2637 2638 if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode))) 2639 return -EOPNOTSUPP; 2640 2641 ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg); 2642 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2643 return ret; 2644 } 2645 2646 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) 2647 { 2648 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2649 return -EOPNOTSUPP; 2650 return fscrypt_ioctl_get_policy(filp, (void __user *)arg); 2651 } 2652 2653 static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) 2654 { 2655 struct inode *inode = file_inode(filp); 2656 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2657 u8 encrypt_pw_salt[16]; 2658 int err; 2659 2660 if (!f2fs_sb_has_encrypt(sbi)) 2661 return -EOPNOTSUPP; 2662 2663 err = mnt_want_write_file(filp); 2664 if (err) 2665 return err; 2666 2667 f2fs_down_write(&sbi->sb_lock); 2668 2669 if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) 2670 goto got_it; 2671 2672 /* update superblock with uuid */ 2673 generate_random_uuid(sbi->raw_super->encrypt_pw_salt); 2674 2675 err = f2fs_commit_super(sbi, false); 2676 if (err) { 2677 /* undo new data */ 2678 memset(sbi->raw_super->encrypt_pw_salt, 0, 16); 2679 goto out_err; 2680 } 2681 got_it: 2682 memcpy(encrypt_pw_salt, sbi->raw_super->encrypt_pw_salt, 16); 2683 out_err: 2684 f2fs_up_write(&sbi->sb_lock); 2685 mnt_drop_write_file(filp); 2686 2687 if (!err && copy_to_user((__u8 __user *)arg, encrypt_pw_salt, 16)) 2688 err = -EFAULT; 2689 2690 return err; 2691 } 2692 2693 static int f2fs_ioc_get_encryption_policy_ex(struct file *filp, 2694 unsigned long arg) 2695 { 2696 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2697 return -EOPNOTSUPP; 2698 2699 return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg); 2700 } 2701 2702 static int f2fs_ioc_add_encryption_key(struct file *filp, unsigned long arg) 2703 { 2704 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2705 return -EOPNOTSUPP; 2706 2707 return fscrypt_ioctl_add_key(filp, (void __user *)arg); 2708 } 2709 2710 static int f2fs_ioc_remove_encryption_key(struct file *filp, unsigned long arg) 2711 { 2712 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2713 return -EOPNOTSUPP; 2714 2715 return fscrypt_ioctl_remove_key(filp, (void __user *)arg); 2716 } 2717 2718 static int f2fs_ioc_remove_encryption_key_all_users(struct file *filp, 2719 unsigned long arg) 2720 { 2721 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2722 return -EOPNOTSUPP; 2723 2724 return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg); 2725 } 2726 2727 static int f2fs_ioc_get_encryption_key_status(struct file *filp, 2728 unsigned long arg) 2729 { 2730 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2731 return -EOPNOTSUPP; 2732 2733 return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); 2734 } 2735 2736 static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg) 2737 { 2738 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2739 return -EOPNOTSUPP; 2740 2741 return fscrypt_ioctl_get_nonce(filp, (void __user *)arg); 2742 } 2743 2744 static int f2fs_ioc_gc(struct file *filp, unsigned long arg) 2745 { 2746 struct inode *inode = file_inode(filp); 2747 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2748 struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, 2749 .no_bg_gc = false, 2750 .should_migrate_blocks = false, 2751 .nr_free_secs = 0 }; 2752 __u32 sync; 2753 int ret; 2754 2755 if (!capable(CAP_SYS_ADMIN)) 2756 return -EPERM; 2757 2758 if (get_user(sync, (__u32 __user *)arg)) 2759 return -EFAULT; 2760 2761 if (f2fs_readonly(sbi->sb)) 2762 return -EROFS; 2763 2764 ret = mnt_want_write_file(filp); 2765 if (ret) 2766 return ret; 2767 2768 if (!sync) { 2769 if (!f2fs_down_write_trylock(&sbi->gc_lock)) { 2770 ret = -EBUSY; 2771 goto out; 2772 } 2773 } else { 2774 f2fs_down_write(&sbi->gc_lock); 2775 } 2776 2777 gc_control.init_gc_type = sync ? FG_GC : BG_GC; 2778 gc_control.err_gc_skipped = sync; 2779 stat_inc_gc_call_count(sbi, FOREGROUND); 2780 ret = f2fs_gc(sbi, &gc_control); 2781 out: 2782 mnt_drop_write_file(filp); 2783 return ret; 2784 } 2785 2786 static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range) 2787 { 2788 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); 2789 struct f2fs_gc_control gc_control = { 2790 .init_gc_type = range->sync ? FG_GC : BG_GC, 2791 .no_bg_gc = false, 2792 .should_migrate_blocks = false, 2793 .err_gc_skipped = range->sync, 2794 .nr_free_secs = 0 }; 2795 u64 end; 2796 int ret; 2797 2798 if (!capable(CAP_SYS_ADMIN)) 2799 return -EPERM; 2800 if (f2fs_readonly(sbi->sb)) 2801 return -EROFS; 2802 2803 end = range->start + range->len; 2804 if (end < range->start || range->start < MAIN_BLKADDR(sbi) || 2805 end >= MAX_BLKADDR(sbi)) 2806 return -EINVAL; 2807 2808 ret = mnt_want_write_file(filp); 2809 if (ret) 2810 return ret; 2811 2812 do_more: 2813 if (!range->sync) { 2814 if (!f2fs_down_write_trylock(&sbi->gc_lock)) { 2815 ret = -EBUSY; 2816 goto out; 2817 } 2818 } else { 2819 f2fs_down_write(&sbi->gc_lock); 2820 } 2821 2822 gc_control.victim_segno = GET_SEGNO(sbi, range->start); 2823 stat_inc_gc_call_count(sbi, FOREGROUND); 2824 ret = f2fs_gc(sbi, &gc_control); 2825 if (ret) { 2826 if (ret == -EBUSY) 2827 ret = -EAGAIN; 2828 goto out; 2829 } 2830 range->start += CAP_BLKS_PER_SEC(sbi); 2831 if (range->start <= end) 2832 goto do_more; 2833 out: 2834 mnt_drop_write_file(filp); 2835 return ret; 2836 } 2837 2838 static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) 2839 { 2840 struct f2fs_gc_range range; 2841 2842 if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg, 2843 sizeof(range))) 2844 return -EFAULT; 2845 return __f2fs_ioc_gc_range(filp, &range); 2846 } 2847 2848 static int f2fs_ioc_write_checkpoint(struct file *filp) 2849 { 2850 struct inode *inode = file_inode(filp); 2851 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2852 int ret; 2853 2854 if (!capable(CAP_SYS_ADMIN)) 2855 return -EPERM; 2856 2857 if (f2fs_readonly(sbi->sb)) 2858 return -EROFS; 2859 2860 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 2861 f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled."); 2862 return -EINVAL; 2863 } 2864 2865 ret = mnt_want_write_file(filp); 2866 if (ret) 2867 return ret; 2868 2869 ret = f2fs_sync_fs(sbi->sb, 1); 2870 2871 mnt_drop_write_file(filp); 2872 return ret; 2873 } 2874 2875 static int f2fs_defragment_range(struct f2fs_sb_info *sbi, 2876 struct file *filp, 2877 struct f2fs_defragment *range) 2878 { 2879 struct inode *inode = file_inode(filp); 2880 struct f2fs_map_blocks map = { .m_next_extent = NULL, 2881 .m_seg_type = NO_CHECK_TYPE, 2882 .m_may_create = false }; 2883 struct extent_info ei = {}; 2884 pgoff_t pg_start, pg_end, next_pgofs; 2885 unsigned int total = 0, sec_num; 2886 block_t blk_end = 0; 2887 bool fragmented = false; 2888 int err; 2889 2890 f2fs_balance_fs(sbi, true); 2891 2892 inode_lock(inode); 2893 pg_start = range->start >> PAGE_SHIFT; 2894 pg_end = min_t(pgoff_t, 2895 (range->start + range->len) >> PAGE_SHIFT, 2896 DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE)); 2897 2898 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) || 2899 f2fs_is_atomic_file(inode)) { 2900 err = -EINVAL; 2901 goto unlock_out; 2902 } 2903 2904 /* if in-place-update policy is enabled, don't waste time here */ 2905 set_inode_flag(inode, FI_OPU_WRITE); 2906 if (f2fs_should_update_inplace(inode, NULL)) { 2907 err = -EINVAL; 2908 goto out; 2909 } 2910 2911 /* writeback all dirty pages in the range */ 2912 err = filemap_write_and_wait_range(inode->i_mapping, 2913 pg_start << PAGE_SHIFT, 2914 (pg_end << PAGE_SHIFT) - 1); 2915 if (err) 2916 goto out; 2917 2918 /* 2919 * lookup mapping info in extent cache, skip defragmenting if physical 2920 * block addresses are continuous. 2921 */ 2922 if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) { 2923 if ((pgoff_t)ei.fofs + ei.len >= pg_end) 2924 goto out; 2925 } 2926 2927 map.m_lblk = pg_start; 2928 map.m_next_pgofs = &next_pgofs; 2929 2930 /* 2931 * lookup mapping info in dnode page cache, skip defragmenting if all 2932 * physical block addresses are continuous even if there are hole(s) 2933 * in logical blocks. 2934 */ 2935 while (map.m_lblk < pg_end) { 2936 map.m_len = pg_end - map.m_lblk; 2937 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); 2938 if (err) 2939 goto out; 2940 2941 if (!(map.m_flags & F2FS_MAP_FLAGS)) { 2942 map.m_lblk = next_pgofs; 2943 continue; 2944 } 2945 2946 if (blk_end && blk_end != map.m_pblk) 2947 fragmented = true; 2948 2949 /* record total count of block that we're going to move */ 2950 total += map.m_len; 2951 2952 blk_end = map.m_pblk + map.m_len; 2953 2954 map.m_lblk += map.m_len; 2955 } 2956 2957 if (!fragmented) { 2958 total = 0; 2959 goto out; 2960 } 2961 2962 sec_num = DIV_ROUND_UP(total, CAP_BLKS_PER_SEC(sbi)); 2963 2964 /* 2965 * make sure there are enough free section for LFS allocation, this can 2966 * avoid defragment running in SSR mode when free section are allocated 2967 * intensively 2968 */ 2969 if (has_not_enough_free_secs(sbi, 0, sec_num)) { 2970 err = -EAGAIN; 2971 goto out; 2972 } 2973 2974 map.m_lblk = pg_start; 2975 map.m_len = pg_end - pg_start; 2976 total = 0; 2977 2978 while (map.m_lblk < pg_end) { 2979 pgoff_t idx; 2980 int cnt = 0; 2981 2982 do_map: 2983 map.m_len = pg_end - map.m_lblk; 2984 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); 2985 if (err) 2986 goto clear_out; 2987 2988 if (!(map.m_flags & F2FS_MAP_FLAGS)) { 2989 map.m_lblk = next_pgofs; 2990 goto check; 2991 } 2992 2993 set_inode_flag(inode, FI_SKIP_WRITES); 2994 2995 idx = map.m_lblk; 2996 while (idx < map.m_lblk + map.m_len && 2997 cnt < BLKS_PER_SEG(sbi)) { 2998 struct folio *folio; 2999 3000 folio = f2fs_get_lock_data_folio(inode, idx, true); 3001 if (IS_ERR(folio)) { 3002 err = PTR_ERR(folio); 3003 goto clear_out; 3004 } 3005 3006 f2fs_folio_wait_writeback(folio, DATA, true, true); 3007 3008 folio_mark_dirty(folio); 3009 folio_set_f2fs_gcing(folio); 3010 f2fs_folio_put(folio, true); 3011 3012 idx++; 3013 cnt++; 3014 total++; 3015 } 3016 3017 map.m_lblk = idx; 3018 check: 3019 if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi)) 3020 goto do_map; 3021 3022 clear_inode_flag(inode, FI_SKIP_WRITES); 3023 3024 err = filemap_fdatawrite(inode->i_mapping); 3025 if (err) 3026 goto out; 3027 } 3028 clear_out: 3029 clear_inode_flag(inode, FI_SKIP_WRITES); 3030 out: 3031 clear_inode_flag(inode, FI_OPU_WRITE); 3032 unlock_out: 3033 inode_unlock(inode); 3034 if (!err) 3035 range->len = (u64)total << PAGE_SHIFT; 3036 return err; 3037 } 3038 3039 static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) 3040 { 3041 struct inode *inode = file_inode(filp); 3042 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3043 struct f2fs_defragment range; 3044 int err; 3045 3046 if (!capable(CAP_SYS_ADMIN)) 3047 return -EPERM; 3048 3049 if (!S_ISREG(inode->i_mode)) 3050 return -EINVAL; 3051 3052 if (f2fs_readonly(sbi->sb)) 3053 return -EROFS; 3054 3055 if (copy_from_user(&range, (struct f2fs_defragment __user *)arg, 3056 sizeof(range))) 3057 return -EFAULT; 3058 3059 /* verify alignment of offset & size */ 3060 if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1)) 3061 return -EINVAL; 3062 3063 if (unlikely((range.start + range.len) >> PAGE_SHIFT > 3064 max_file_blocks(inode))) 3065 return -EINVAL; 3066 3067 err = mnt_want_write_file(filp); 3068 if (err) 3069 return err; 3070 3071 err = f2fs_defragment_range(sbi, filp, &range); 3072 mnt_drop_write_file(filp); 3073 3074 if (range.len) 3075 f2fs_update_time(sbi, REQ_TIME); 3076 if (err < 0) 3077 return err; 3078 3079 if (copy_to_user((struct f2fs_defragment __user *)arg, &range, 3080 sizeof(range))) 3081 return -EFAULT; 3082 3083 return 0; 3084 } 3085 3086 static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, 3087 struct file *file_out, loff_t pos_out, size_t len) 3088 { 3089 struct inode *src = file_inode(file_in); 3090 struct inode *dst = file_inode(file_out); 3091 struct f2fs_sb_info *sbi = F2FS_I_SB(src); 3092 size_t olen = len, dst_max_i_size = 0; 3093 size_t dst_osize; 3094 int ret; 3095 3096 if (file_in->f_path.mnt != file_out->f_path.mnt || 3097 src->i_sb != dst->i_sb) 3098 return -EXDEV; 3099 3100 if (unlikely(f2fs_readonly(src->i_sb))) 3101 return -EROFS; 3102 3103 if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode)) 3104 return -EINVAL; 3105 3106 if (IS_ENCRYPTED(src) || IS_ENCRYPTED(dst)) 3107 return -EOPNOTSUPP; 3108 3109 if (pos_out < 0 || pos_in < 0) 3110 return -EINVAL; 3111 3112 if (src == dst) { 3113 if (pos_in == pos_out) 3114 return 0; 3115 if (pos_out > pos_in && pos_out < pos_in + len) 3116 return -EINVAL; 3117 } 3118 3119 inode_lock(src); 3120 if (src != dst) { 3121 ret = -EBUSY; 3122 if (!inode_trylock(dst)) 3123 goto out; 3124 } 3125 3126 if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) || 3127 f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) { 3128 ret = -EOPNOTSUPP; 3129 goto out_unlock; 3130 } 3131 3132 if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) { 3133 ret = -EINVAL; 3134 goto out_unlock; 3135 } 3136 3137 ret = -EINVAL; 3138 if (pos_in + len > src->i_size || pos_in + len < pos_in) 3139 goto out_unlock; 3140 if (len == 0) 3141 olen = len = src->i_size - pos_in; 3142 if (pos_in + len == src->i_size) 3143 len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in; 3144 if (len == 0) { 3145 ret = 0; 3146 goto out_unlock; 3147 } 3148 3149 dst_osize = dst->i_size; 3150 if (pos_out + olen > dst->i_size) 3151 dst_max_i_size = pos_out + olen; 3152 3153 /* verify the end result is block aligned */ 3154 if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) || 3155 !IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) || 3156 !IS_ALIGNED(pos_out, F2FS_BLKSIZE)) 3157 goto out_unlock; 3158 3159 ret = f2fs_convert_inline_inode(src); 3160 if (ret) 3161 goto out_unlock; 3162 3163 ret = f2fs_convert_inline_inode(dst); 3164 if (ret) 3165 goto out_unlock; 3166 3167 /* write out all dirty pages from offset */ 3168 ret = filemap_write_and_wait_range(src->i_mapping, 3169 pos_in, pos_in + len); 3170 if (ret) 3171 goto out_unlock; 3172 3173 ret = filemap_write_and_wait_range(dst->i_mapping, 3174 pos_out, pos_out + len); 3175 if (ret) 3176 goto out_unlock; 3177 3178 f2fs_balance_fs(sbi, true); 3179 3180 f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); 3181 if (src != dst) { 3182 ret = -EBUSY; 3183 if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) 3184 goto out_src; 3185 } 3186 3187 f2fs_lock_op(sbi); 3188 ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in), 3189 F2FS_BYTES_TO_BLK(pos_out), 3190 F2FS_BYTES_TO_BLK(len), false); 3191 3192 if (!ret) { 3193 if (dst_max_i_size) 3194 f2fs_i_size_write(dst, dst_max_i_size); 3195 else if (dst_osize != dst->i_size) 3196 f2fs_i_size_write(dst, dst_osize); 3197 } 3198 f2fs_unlock_op(sbi); 3199 3200 if (src != dst) 3201 f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); 3202 out_src: 3203 f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); 3204 if (ret) 3205 goto out_unlock; 3206 3207 inode_set_mtime_to_ts(src, inode_set_ctime_current(src)); 3208 f2fs_mark_inode_dirty_sync(src, false); 3209 if (src != dst) { 3210 inode_set_mtime_to_ts(dst, inode_set_ctime_current(dst)); 3211 f2fs_mark_inode_dirty_sync(dst, false); 3212 } 3213 f2fs_update_time(sbi, REQ_TIME); 3214 3215 out_unlock: 3216 if (src != dst) 3217 inode_unlock(dst); 3218 out: 3219 inode_unlock(src); 3220 return ret; 3221 } 3222 3223 static int __f2fs_ioc_move_range(struct file *filp, 3224 struct f2fs_move_range *range) 3225 { 3226 int err; 3227 3228 if (!(filp->f_mode & FMODE_READ) || 3229 !(filp->f_mode & FMODE_WRITE)) 3230 return -EBADF; 3231 3232 CLASS(fd, dst)(range->dst_fd); 3233 if (fd_empty(dst)) 3234 return -EBADF; 3235 3236 if (!(fd_file(dst)->f_mode & FMODE_WRITE)) 3237 return -EBADF; 3238 3239 err = mnt_want_write_file(filp); 3240 if (err) 3241 return err; 3242 3243 err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst), 3244 range->pos_out, range->len); 3245 3246 mnt_drop_write_file(filp); 3247 return err; 3248 } 3249 3250 static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) 3251 { 3252 struct f2fs_move_range range; 3253 3254 if (copy_from_user(&range, (struct f2fs_move_range __user *)arg, 3255 sizeof(range))) 3256 return -EFAULT; 3257 return __f2fs_ioc_move_range(filp, &range); 3258 } 3259 3260 static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) 3261 { 3262 struct inode *inode = file_inode(filp); 3263 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3264 struct sit_info *sm = SIT_I(sbi); 3265 unsigned int start_segno = 0, end_segno = 0; 3266 unsigned int dev_start_segno = 0, dev_end_segno = 0; 3267 struct f2fs_flush_device range; 3268 struct f2fs_gc_control gc_control = { 3269 .init_gc_type = FG_GC, 3270 .should_migrate_blocks = true, 3271 .err_gc_skipped = true, 3272 .nr_free_secs = 0 }; 3273 int ret; 3274 3275 if (!capable(CAP_SYS_ADMIN)) 3276 return -EPERM; 3277 3278 if (f2fs_readonly(sbi->sb)) 3279 return -EROFS; 3280 3281 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) 3282 return -EINVAL; 3283 3284 if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, 3285 sizeof(range))) 3286 return -EFAULT; 3287 3288 if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num || 3289 __is_large_section(sbi)) { 3290 f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1", 3291 range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi)); 3292 return -EINVAL; 3293 } 3294 3295 ret = mnt_want_write_file(filp); 3296 if (ret) 3297 return ret; 3298 3299 if (range.dev_num != 0) 3300 dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk); 3301 dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk); 3302 3303 start_segno = sm->last_victim[FLUSH_DEVICE]; 3304 if (start_segno < dev_start_segno || start_segno >= dev_end_segno) 3305 start_segno = dev_start_segno; 3306 end_segno = min(start_segno + range.segments, dev_end_segno); 3307 3308 while (start_segno < end_segno) { 3309 if (!f2fs_down_write_trylock(&sbi->gc_lock)) { 3310 ret = -EBUSY; 3311 goto out; 3312 } 3313 sm->last_victim[GC_CB] = end_segno + 1; 3314 sm->last_victim[GC_GREEDY] = end_segno + 1; 3315 sm->last_victim[ALLOC_NEXT] = end_segno + 1; 3316 3317 gc_control.victim_segno = start_segno; 3318 stat_inc_gc_call_count(sbi, FOREGROUND); 3319 ret = f2fs_gc(sbi, &gc_control); 3320 if (ret == -EAGAIN) 3321 ret = 0; 3322 else if (ret < 0) 3323 break; 3324 start_segno++; 3325 } 3326 out: 3327 mnt_drop_write_file(filp); 3328 return ret; 3329 } 3330 3331 static int f2fs_ioc_get_features(struct file *filp, unsigned long arg) 3332 { 3333 struct inode *inode = file_inode(filp); 3334 u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature); 3335 3336 /* Must validate to set it with SQLite behavior in Android. */ 3337 sb_feature |= F2FS_FEATURE_ATOMIC_WRITE; 3338 3339 return put_user(sb_feature, (u32 __user *)arg); 3340 } 3341 3342 #ifdef CONFIG_QUOTA 3343 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) 3344 { 3345 struct dquot *transfer_to[MAXQUOTAS] = {}; 3346 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3347 struct super_block *sb = sbi->sb; 3348 int err; 3349 3350 transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); 3351 if (IS_ERR(transfer_to[PRJQUOTA])) 3352 return PTR_ERR(transfer_to[PRJQUOTA]); 3353 3354 err = __dquot_transfer(inode, transfer_to); 3355 if (err) 3356 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); 3357 dqput(transfer_to[PRJQUOTA]); 3358 return err; 3359 } 3360 3361 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) 3362 { 3363 struct f2fs_inode_info *fi = F2FS_I(inode); 3364 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3365 struct f2fs_inode *ri = NULL; 3366 kprojid_t kprojid; 3367 int err; 3368 3369 if (!f2fs_sb_has_project_quota(sbi)) { 3370 if (projid != F2FS_DEF_PROJID) 3371 return -EOPNOTSUPP; 3372 else 3373 return 0; 3374 } 3375 3376 if (!f2fs_has_extra_attr(inode)) 3377 return -EOPNOTSUPP; 3378 3379 kprojid = make_kprojid(&init_user_ns, (projid_t)projid); 3380 3381 if (projid_eq(kprojid, fi->i_projid)) 3382 return 0; 3383 3384 err = -EPERM; 3385 /* Is it quota file? Do not allow user to mess with it */ 3386 if (IS_NOQUOTA(inode)) 3387 return err; 3388 3389 if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) 3390 return -EOVERFLOW; 3391 3392 err = f2fs_dquot_initialize(inode); 3393 if (err) 3394 return err; 3395 3396 f2fs_lock_op(sbi); 3397 err = f2fs_transfer_project_quota(inode, kprojid); 3398 if (err) 3399 goto out_unlock; 3400 3401 fi->i_projid = kprojid; 3402 inode_set_ctime_current(inode); 3403 f2fs_mark_inode_dirty_sync(inode, true); 3404 out_unlock: 3405 f2fs_unlock_op(sbi); 3406 return err; 3407 } 3408 #else 3409 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) 3410 { 3411 return 0; 3412 } 3413 3414 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) 3415 { 3416 if (projid != F2FS_DEF_PROJID) 3417 return -EOPNOTSUPP; 3418 return 0; 3419 } 3420 #endif 3421 3422 int f2fs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) 3423 { 3424 struct inode *inode = d_inode(dentry); 3425 struct f2fs_inode_info *fi = F2FS_I(inode); 3426 u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags); 3427 3428 if (IS_ENCRYPTED(inode)) 3429 fsflags |= FS_ENCRYPT_FL; 3430 if (IS_VERITY(inode)) 3431 fsflags |= FS_VERITY_FL; 3432 if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) 3433 fsflags |= FS_INLINE_DATA_FL; 3434 if (is_inode_flag_set(inode, FI_PIN_FILE)) 3435 fsflags |= FS_NOCOW_FL; 3436 3437 fileattr_fill_flags(fa, fsflags & F2FS_GETTABLE_FS_FL); 3438 3439 if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) 3440 fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid); 3441 3442 return 0; 3443 } 3444 3445 int f2fs_fileattr_set(struct mnt_idmap *idmap, 3446 struct dentry *dentry, struct file_kattr *fa) 3447 { 3448 struct inode *inode = d_inode(dentry); 3449 u32 fsflags = fa->flags, mask = F2FS_SETTABLE_FS_FL; 3450 u32 iflags; 3451 int err; 3452 3453 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 3454 return -EIO; 3455 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) 3456 return -ENOSPC; 3457 if (fsflags & ~F2FS_GETTABLE_FS_FL) 3458 return -EOPNOTSUPP; 3459 fsflags &= F2FS_SETTABLE_FS_FL; 3460 if (!fa->flags_valid) 3461 mask &= FS_COMMON_FL; 3462 3463 iflags = f2fs_fsflags_to_iflags(fsflags); 3464 if (f2fs_mask_flags(inode->i_mode, iflags) != iflags) 3465 return -EOPNOTSUPP; 3466 3467 err = f2fs_setflags_common(inode, iflags, f2fs_fsflags_to_iflags(mask)); 3468 if (!err) 3469 err = f2fs_ioc_setproject(inode, fa->fsx_projid); 3470 3471 return err; 3472 } 3473 3474 int f2fs_pin_file_control(struct inode *inode, bool inc) 3475 { 3476 struct f2fs_inode_info *fi = F2FS_I(inode); 3477 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3478 3479 if (IS_DEVICE_ALIASING(inode)) 3480 return -EINVAL; 3481 3482 if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { 3483 f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", 3484 __func__, inode->i_ino, fi->i_gc_failures); 3485 clear_inode_flag(inode, FI_PIN_FILE); 3486 return -EAGAIN; 3487 } 3488 3489 /* Use i_gc_failures for normal file as a risk signal. */ 3490 if (inc) 3491 f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1); 3492 3493 return 0; 3494 } 3495 3496 static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) 3497 { 3498 struct inode *inode = file_inode(filp); 3499 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3500 __u32 pin; 3501 int ret = 0; 3502 3503 if (get_user(pin, (__u32 __user *)arg)) 3504 return -EFAULT; 3505 3506 if (!S_ISREG(inode->i_mode)) 3507 return -EINVAL; 3508 3509 if (f2fs_readonly(sbi->sb)) 3510 return -EROFS; 3511 3512 if (!pin && IS_DEVICE_ALIASING(inode)) 3513 return -EOPNOTSUPP; 3514 3515 ret = mnt_want_write_file(filp); 3516 if (ret) 3517 return ret; 3518 3519 inode_lock(inode); 3520 3521 if (f2fs_is_atomic_file(inode)) { 3522 ret = -EINVAL; 3523 goto out; 3524 } 3525 3526 if (!pin) { 3527 clear_inode_flag(inode, FI_PIN_FILE); 3528 f2fs_i_gc_failures_write(inode, 0); 3529 goto done; 3530 } else if (f2fs_is_pinned_file(inode)) { 3531 goto done; 3532 } 3533 3534 if (F2FS_HAS_BLOCKS(inode)) { 3535 ret = -EFBIG; 3536 goto out; 3537 } 3538 3539 /* Let's allow file pinning on zoned device. */ 3540 if (!f2fs_sb_has_blkzoned(sbi) && 3541 f2fs_should_update_outplace(inode, NULL)) { 3542 ret = -EINVAL; 3543 goto out; 3544 } 3545 3546 if (f2fs_pin_file_control(inode, false)) { 3547 ret = -EAGAIN; 3548 goto out; 3549 } 3550 3551 ret = f2fs_convert_inline_inode(inode); 3552 if (ret) 3553 goto out; 3554 3555 if (!f2fs_disable_compressed_file(inode)) { 3556 ret = -EOPNOTSUPP; 3557 goto out; 3558 } 3559 3560 set_inode_flag(inode, FI_PIN_FILE); 3561 ret = F2FS_I(inode)->i_gc_failures; 3562 done: 3563 f2fs_update_time(sbi, REQ_TIME); 3564 out: 3565 inode_unlock(inode); 3566 mnt_drop_write_file(filp); 3567 return ret; 3568 } 3569 3570 static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) 3571 { 3572 struct inode *inode = file_inode(filp); 3573 __u32 pin = 0; 3574 3575 if (is_inode_flag_set(inode, FI_PIN_FILE)) 3576 pin = F2FS_I(inode)->i_gc_failures; 3577 return put_user(pin, (u32 __user *)arg); 3578 } 3579 3580 static int f2fs_ioc_get_dev_alias_file(struct file *filp, unsigned long arg) 3581 { 3582 return put_user(IS_DEVICE_ALIASING(file_inode(filp)) ? 1 : 0, 3583 (u32 __user *)arg); 3584 } 3585 3586 static int f2fs_ioc_io_prio(struct file *filp, unsigned long arg) 3587 { 3588 struct inode *inode = file_inode(filp); 3589 __u32 level; 3590 3591 if (get_user(level, (__u32 __user *)arg)) 3592 return -EFAULT; 3593 3594 if (!S_ISREG(inode->i_mode) || level >= F2FS_IOPRIO_MAX) 3595 return -EINVAL; 3596 3597 inode_lock(inode); 3598 F2FS_I(inode)->ioprio_hint = level; 3599 inode_unlock(inode); 3600 return 0; 3601 } 3602 3603 int f2fs_precache_extents(struct inode *inode) 3604 { 3605 struct f2fs_inode_info *fi = F2FS_I(inode); 3606 struct f2fs_map_blocks map; 3607 pgoff_t m_next_extent; 3608 loff_t end; 3609 int err; 3610 3611 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 3612 return -EOPNOTSUPP; 3613 3614 map.m_lblk = 0; 3615 map.m_pblk = 0; 3616 map.m_next_pgofs = NULL; 3617 map.m_next_extent = &m_next_extent; 3618 map.m_seg_type = NO_CHECK_TYPE; 3619 map.m_may_create = false; 3620 end = F2FS_BLK_ALIGN(i_size_read(inode)); 3621 3622 while (map.m_lblk < end) { 3623 map.m_len = end - map.m_lblk; 3624 3625 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 3626 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE); 3627 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 3628 if (err || !map.m_len) 3629 return err; 3630 3631 map.m_lblk = m_next_extent; 3632 } 3633 3634 return 0; 3635 } 3636 3637 static int f2fs_ioc_precache_extents(struct file *filp) 3638 { 3639 return f2fs_precache_extents(file_inode(filp)); 3640 } 3641 3642 static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) 3643 { 3644 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); 3645 __u64 block_count; 3646 3647 if (!capable(CAP_SYS_ADMIN)) 3648 return -EPERM; 3649 3650 if (f2fs_readonly(sbi->sb)) 3651 return -EROFS; 3652 3653 if (copy_from_user(&block_count, (void __user *)arg, 3654 sizeof(block_count))) 3655 return -EFAULT; 3656 3657 return f2fs_resize_fs(filp, block_count); 3658 } 3659 3660 static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg) 3661 { 3662 struct inode *inode = file_inode(filp); 3663 3664 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 3665 3666 if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) { 3667 f2fs_warn(F2FS_I_SB(inode), 3668 "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem", 3669 inode->i_ino); 3670 return -EOPNOTSUPP; 3671 } 3672 3673 return fsverity_ioctl_enable(filp, (const void __user *)arg); 3674 } 3675 3676 static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg) 3677 { 3678 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp)))) 3679 return -EOPNOTSUPP; 3680 3681 return fsverity_ioctl_measure(filp, (void __user *)arg); 3682 } 3683 3684 static int f2fs_ioc_read_verity_metadata(struct file *filp, unsigned long arg) 3685 { 3686 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp)))) 3687 return -EOPNOTSUPP; 3688 3689 return fsverity_ioctl_read_metadata(filp, (const void __user *)arg); 3690 } 3691 3692 static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg) 3693 { 3694 struct inode *inode = file_inode(filp); 3695 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3696 char *vbuf; 3697 int count; 3698 int err = 0; 3699 3700 vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL); 3701 if (!vbuf) 3702 return -ENOMEM; 3703 3704 f2fs_down_read(&sbi->sb_lock); 3705 count = utf16s_to_utf8s(sbi->raw_super->volume_name, 3706 ARRAY_SIZE(sbi->raw_super->volume_name), 3707 UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME); 3708 f2fs_up_read(&sbi->sb_lock); 3709 3710 if (copy_to_user((char __user *)arg, vbuf, 3711 min(FSLABEL_MAX, count))) 3712 err = -EFAULT; 3713 3714 kfree(vbuf); 3715 return err; 3716 } 3717 3718 static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg) 3719 { 3720 struct inode *inode = file_inode(filp); 3721 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3722 char *vbuf; 3723 int err = 0; 3724 3725 if (!capable(CAP_SYS_ADMIN)) 3726 return -EPERM; 3727 3728 vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX); 3729 if (IS_ERR(vbuf)) 3730 return PTR_ERR(vbuf); 3731 3732 err = mnt_want_write_file(filp); 3733 if (err) 3734 goto out; 3735 3736 f2fs_down_write(&sbi->sb_lock); 3737 3738 memset(sbi->raw_super->volume_name, 0, 3739 sizeof(sbi->raw_super->volume_name)); 3740 utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN, 3741 sbi->raw_super->volume_name, 3742 ARRAY_SIZE(sbi->raw_super->volume_name)); 3743 3744 err = f2fs_commit_super(sbi, false); 3745 3746 f2fs_up_write(&sbi->sb_lock); 3747 3748 mnt_drop_write_file(filp); 3749 out: 3750 kfree(vbuf); 3751 return err; 3752 } 3753 3754 static int f2fs_get_compress_blocks(struct inode *inode, __u64 *blocks) 3755 { 3756 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 3757 return -EOPNOTSUPP; 3758 3759 if (!f2fs_compressed_file(inode)) 3760 return -EINVAL; 3761 3762 *blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks); 3763 3764 return 0; 3765 } 3766 3767 static int f2fs_ioc_get_compress_blocks(struct file *filp, unsigned long arg) 3768 { 3769 struct inode *inode = file_inode(filp); 3770 __u64 blocks; 3771 int ret; 3772 3773 ret = f2fs_get_compress_blocks(inode, &blocks); 3774 if (ret < 0) 3775 return ret; 3776 3777 return put_user(blocks, (u64 __user *)arg); 3778 } 3779 3780 static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) 3781 { 3782 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 3783 unsigned int released_blocks = 0; 3784 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 3785 block_t blkaddr; 3786 int i; 3787 3788 for (i = 0; i < count; i++) { 3789 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3790 dn->ofs_in_node + i); 3791 3792 if (!__is_valid_data_blkaddr(blkaddr)) 3793 continue; 3794 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, 3795 DATA_GENERIC_ENHANCE))) 3796 return -EFSCORRUPTED; 3797 } 3798 3799 while (count) { 3800 int compr_blocks = 0; 3801 3802 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { 3803 blkaddr = f2fs_data_blkaddr(dn); 3804 3805 if (i == 0) { 3806 if (blkaddr == COMPRESS_ADDR) 3807 continue; 3808 dn->ofs_in_node += cluster_size; 3809 goto next; 3810 } 3811 3812 if (__is_valid_data_blkaddr(blkaddr)) 3813 compr_blocks++; 3814 3815 if (blkaddr != NEW_ADDR) 3816 continue; 3817 3818 f2fs_set_data_blkaddr(dn, NULL_ADDR); 3819 } 3820 3821 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false); 3822 dec_valid_block_count(sbi, dn->inode, 3823 cluster_size - compr_blocks); 3824 3825 released_blocks += cluster_size - compr_blocks; 3826 next: 3827 count -= cluster_size; 3828 } 3829 3830 return released_blocks; 3831 } 3832 3833 static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) 3834 { 3835 struct inode *inode = file_inode(filp); 3836 struct f2fs_inode_info *fi = F2FS_I(inode); 3837 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3838 pgoff_t page_idx = 0, last_idx; 3839 unsigned int released_blocks = 0; 3840 int ret; 3841 int writecount; 3842 3843 if (!f2fs_sb_has_compression(sbi)) 3844 return -EOPNOTSUPP; 3845 3846 if (f2fs_readonly(sbi->sb)) 3847 return -EROFS; 3848 3849 ret = mnt_want_write_file(filp); 3850 if (ret) 3851 return ret; 3852 3853 f2fs_balance_fs(sbi, true); 3854 3855 inode_lock(inode); 3856 3857 writecount = atomic_read(&inode->i_writecount); 3858 if ((filp->f_mode & FMODE_WRITE && writecount != 1) || 3859 (!(filp->f_mode & FMODE_WRITE) && writecount)) { 3860 ret = -EBUSY; 3861 goto out; 3862 } 3863 3864 if (!f2fs_compressed_file(inode) || 3865 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 3866 ret = -EINVAL; 3867 goto out; 3868 } 3869 3870 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 3871 if (ret) 3872 goto out; 3873 3874 if (!atomic_read(&fi->i_compr_blocks)) { 3875 ret = -EPERM; 3876 goto out; 3877 } 3878 3879 set_inode_flag(inode, FI_COMPRESS_RELEASED); 3880 inode_set_ctime_current(inode); 3881 f2fs_mark_inode_dirty_sync(inode, true); 3882 3883 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 3884 filemap_invalidate_lock(inode->i_mapping); 3885 3886 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 3887 3888 while (page_idx < last_idx) { 3889 struct dnode_of_data dn; 3890 pgoff_t end_offset, count; 3891 3892 f2fs_lock_op(sbi); 3893 3894 set_new_dnode(&dn, inode, NULL, NULL, 0); 3895 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); 3896 if (ret) { 3897 f2fs_unlock_op(sbi); 3898 if (ret == -ENOENT) { 3899 page_idx = f2fs_get_next_page_offset(&dn, 3900 page_idx); 3901 ret = 0; 3902 continue; 3903 } 3904 break; 3905 } 3906 3907 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 3908 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); 3909 count = round_up(count, fi->i_cluster_size); 3910 3911 ret = release_compress_blocks(&dn, count); 3912 3913 f2fs_put_dnode(&dn); 3914 3915 f2fs_unlock_op(sbi); 3916 3917 if (ret < 0) 3918 break; 3919 3920 page_idx += count; 3921 released_blocks += ret; 3922 } 3923 3924 filemap_invalidate_unlock(inode->i_mapping); 3925 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 3926 out: 3927 if (released_blocks) 3928 f2fs_update_time(sbi, REQ_TIME); 3929 inode_unlock(inode); 3930 3931 mnt_drop_write_file(filp); 3932 3933 if (ret >= 0) { 3934 ret = put_user(released_blocks, (u64 __user *)arg); 3935 } else if (released_blocks && 3936 atomic_read(&fi->i_compr_blocks)) { 3937 set_sbi_flag(sbi, SBI_NEED_FSCK); 3938 f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " 3939 "iblocks=%llu, released=%u, compr_blocks=%u, " 3940 "run fsck to fix.", 3941 __func__, inode->i_ino, inode->i_blocks, 3942 released_blocks, 3943 atomic_read(&fi->i_compr_blocks)); 3944 } 3945 3946 return ret; 3947 } 3948 3949 static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, 3950 unsigned int *reserved_blocks) 3951 { 3952 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 3953 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 3954 block_t blkaddr; 3955 int i; 3956 3957 for (i = 0; i < count; i++) { 3958 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3959 dn->ofs_in_node + i); 3960 3961 if (!__is_valid_data_blkaddr(blkaddr)) 3962 continue; 3963 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, 3964 DATA_GENERIC_ENHANCE))) 3965 return -EFSCORRUPTED; 3966 } 3967 3968 while (count) { 3969 int compr_blocks = 0; 3970 blkcnt_t reserved = 0; 3971 blkcnt_t to_reserved; 3972 int ret; 3973 3974 for (i = 0; i < cluster_size; i++) { 3975 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3976 dn->ofs_in_node + i); 3977 3978 if (i == 0) { 3979 if (blkaddr != COMPRESS_ADDR) { 3980 dn->ofs_in_node += cluster_size; 3981 goto next; 3982 } 3983 continue; 3984 } 3985 3986 /* 3987 * compressed cluster was not released due to it 3988 * fails in release_compress_blocks(), so NEW_ADDR 3989 * is a possible case. 3990 */ 3991 if (blkaddr == NEW_ADDR) { 3992 reserved++; 3993 continue; 3994 } 3995 if (__is_valid_data_blkaddr(blkaddr)) { 3996 compr_blocks++; 3997 continue; 3998 } 3999 } 4000 4001 to_reserved = cluster_size - compr_blocks - reserved; 4002 4003 /* for the case all blocks in cluster were reserved */ 4004 if (reserved && to_reserved == 1) { 4005 dn->ofs_in_node += cluster_size; 4006 goto next; 4007 } 4008 4009 ret = inc_valid_block_count(sbi, dn->inode, 4010 &to_reserved, false); 4011 if (unlikely(ret)) 4012 return ret; 4013 4014 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { 4015 if (f2fs_data_blkaddr(dn) == NULL_ADDR) 4016 f2fs_set_data_blkaddr(dn, NEW_ADDR); 4017 } 4018 4019 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); 4020 4021 *reserved_blocks += to_reserved; 4022 next: 4023 count -= cluster_size; 4024 } 4025 4026 return 0; 4027 } 4028 4029 static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) 4030 { 4031 struct inode *inode = file_inode(filp); 4032 struct f2fs_inode_info *fi = F2FS_I(inode); 4033 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4034 pgoff_t page_idx = 0, last_idx; 4035 unsigned int reserved_blocks = 0; 4036 int ret; 4037 4038 if (!f2fs_sb_has_compression(sbi)) 4039 return -EOPNOTSUPP; 4040 4041 if (f2fs_readonly(sbi->sb)) 4042 return -EROFS; 4043 4044 ret = mnt_want_write_file(filp); 4045 if (ret) 4046 return ret; 4047 4048 f2fs_balance_fs(sbi, true); 4049 4050 inode_lock(inode); 4051 4052 if (!f2fs_compressed_file(inode) || 4053 !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4054 ret = -EINVAL; 4055 goto unlock_inode; 4056 } 4057 4058 if (atomic_read(&fi->i_compr_blocks)) 4059 goto unlock_inode; 4060 4061 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 4062 filemap_invalidate_lock(inode->i_mapping); 4063 4064 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4065 4066 while (page_idx < last_idx) { 4067 struct dnode_of_data dn; 4068 pgoff_t end_offset, count; 4069 4070 f2fs_lock_op(sbi); 4071 4072 set_new_dnode(&dn, inode, NULL, NULL, 0); 4073 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); 4074 if (ret) { 4075 f2fs_unlock_op(sbi); 4076 if (ret == -ENOENT) { 4077 page_idx = f2fs_get_next_page_offset(&dn, 4078 page_idx); 4079 ret = 0; 4080 continue; 4081 } 4082 break; 4083 } 4084 4085 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 4086 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); 4087 count = round_up(count, fi->i_cluster_size); 4088 4089 ret = reserve_compress_blocks(&dn, count, &reserved_blocks); 4090 4091 f2fs_put_dnode(&dn); 4092 4093 f2fs_unlock_op(sbi); 4094 4095 if (ret < 0) 4096 break; 4097 4098 page_idx += count; 4099 } 4100 4101 filemap_invalidate_unlock(inode->i_mapping); 4102 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 4103 4104 if (!ret) { 4105 clear_inode_flag(inode, FI_COMPRESS_RELEASED); 4106 inode_set_ctime_current(inode); 4107 f2fs_mark_inode_dirty_sync(inode, true); 4108 } 4109 unlock_inode: 4110 if (reserved_blocks) 4111 f2fs_update_time(sbi, REQ_TIME); 4112 inode_unlock(inode); 4113 mnt_drop_write_file(filp); 4114 4115 if (!ret) { 4116 ret = put_user(reserved_blocks, (u64 __user *)arg); 4117 } else if (reserved_blocks && 4118 atomic_read(&fi->i_compr_blocks)) { 4119 set_sbi_flag(sbi, SBI_NEED_FSCK); 4120 f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx " 4121 "iblocks=%llu, reserved=%u, compr_blocks=%u, " 4122 "run fsck to fix.", 4123 __func__, inode->i_ino, inode->i_blocks, 4124 reserved_blocks, 4125 atomic_read(&fi->i_compr_blocks)); 4126 } 4127 4128 return ret; 4129 } 4130 4131 static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode, 4132 pgoff_t off, block_t block, block_t len, u32 flags) 4133 { 4134 sector_t sector = SECTOR_FROM_BLOCK(block); 4135 sector_t nr_sects = SECTOR_FROM_BLOCK(len); 4136 int ret = 0; 4137 4138 if (flags & F2FS_TRIM_FILE_DISCARD) { 4139 if (bdev_max_secure_erase_sectors(bdev)) 4140 ret = blkdev_issue_secure_erase(bdev, sector, nr_sects, 4141 GFP_NOFS); 4142 else 4143 ret = blkdev_issue_discard(bdev, sector, nr_sects, 4144 GFP_NOFS); 4145 } 4146 4147 if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) { 4148 if (IS_ENCRYPTED(inode)) 4149 ret = fscrypt_zeroout_range(inode, off, block, len); 4150 else 4151 ret = blkdev_issue_zeroout(bdev, sector, nr_sects, 4152 GFP_NOFS, 0); 4153 } 4154 4155 return ret; 4156 } 4157 4158 static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) 4159 { 4160 struct inode *inode = file_inode(filp); 4161 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4162 struct address_space *mapping = inode->i_mapping; 4163 struct block_device *prev_bdev = NULL; 4164 struct f2fs_sectrim_range range; 4165 pgoff_t index, pg_end, prev_index = 0; 4166 block_t prev_block = 0, len = 0; 4167 loff_t end_addr; 4168 bool to_end = false; 4169 int ret = 0; 4170 4171 if (!(filp->f_mode & FMODE_WRITE)) 4172 return -EBADF; 4173 4174 if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg, 4175 sizeof(range))) 4176 return -EFAULT; 4177 4178 if (range.flags == 0 || (range.flags & ~F2FS_TRIM_FILE_MASK) || 4179 !S_ISREG(inode->i_mode)) 4180 return -EINVAL; 4181 4182 if (((range.flags & F2FS_TRIM_FILE_DISCARD) && 4183 !f2fs_hw_support_discard(sbi)) || 4184 ((range.flags & F2FS_TRIM_FILE_ZEROOUT) && 4185 IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi))) 4186 return -EOPNOTSUPP; 4187 4188 ret = mnt_want_write_file(filp); 4189 if (ret) 4190 return ret; 4191 inode_lock(inode); 4192 4193 if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) || 4194 range.start >= inode->i_size) { 4195 ret = -EINVAL; 4196 goto err; 4197 } 4198 4199 if (range.len == 0) 4200 goto err; 4201 4202 if (inode->i_size - range.start > range.len) { 4203 end_addr = range.start + range.len; 4204 } else { 4205 end_addr = range.len == (u64)-1 ? 4206 sbi->sb->s_maxbytes : inode->i_size; 4207 to_end = true; 4208 } 4209 4210 if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) || 4211 (!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) { 4212 ret = -EINVAL; 4213 goto err; 4214 } 4215 4216 index = F2FS_BYTES_TO_BLK(range.start); 4217 pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE); 4218 4219 ret = f2fs_convert_inline_inode(inode); 4220 if (ret) 4221 goto err; 4222 4223 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 4224 filemap_invalidate_lock(mapping); 4225 4226 ret = filemap_write_and_wait_range(mapping, range.start, 4227 to_end ? LLONG_MAX : end_addr - 1); 4228 if (ret) 4229 goto out; 4230 4231 truncate_inode_pages_range(mapping, range.start, 4232 to_end ? -1 : end_addr - 1); 4233 4234 while (index < pg_end) { 4235 struct dnode_of_data dn; 4236 pgoff_t end_offset, count; 4237 int i; 4238 4239 set_new_dnode(&dn, inode, NULL, NULL, 0); 4240 ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); 4241 if (ret) { 4242 if (ret == -ENOENT) { 4243 index = f2fs_get_next_page_offset(&dn, index); 4244 continue; 4245 } 4246 goto out; 4247 } 4248 4249 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 4250 count = min(end_offset - dn.ofs_in_node, pg_end - index); 4251 for (i = 0; i < count; i++, index++, dn.ofs_in_node++) { 4252 struct block_device *cur_bdev; 4253 block_t blkaddr = f2fs_data_blkaddr(&dn); 4254 4255 if (!__is_valid_data_blkaddr(blkaddr)) 4256 continue; 4257 4258 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, 4259 DATA_GENERIC_ENHANCE)) { 4260 ret = -EFSCORRUPTED; 4261 f2fs_put_dnode(&dn); 4262 goto out; 4263 } 4264 4265 cur_bdev = f2fs_target_device(sbi, blkaddr, NULL); 4266 if (f2fs_is_multi_device(sbi)) { 4267 int di = f2fs_target_device_index(sbi, blkaddr); 4268 4269 blkaddr -= FDEV(di).start_blk; 4270 } 4271 4272 if (len) { 4273 if (prev_bdev == cur_bdev && 4274 index == prev_index + len && 4275 blkaddr == prev_block + len) { 4276 len++; 4277 } else { 4278 ret = f2fs_secure_erase(prev_bdev, 4279 inode, prev_index, prev_block, 4280 len, range.flags); 4281 if (ret) { 4282 f2fs_put_dnode(&dn); 4283 goto out; 4284 } 4285 4286 len = 0; 4287 } 4288 } 4289 4290 if (!len) { 4291 prev_bdev = cur_bdev; 4292 prev_index = index; 4293 prev_block = blkaddr; 4294 len = 1; 4295 } 4296 } 4297 4298 f2fs_put_dnode(&dn); 4299 4300 if (fatal_signal_pending(current)) { 4301 ret = -EINTR; 4302 goto out; 4303 } 4304 cond_resched(); 4305 } 4306 4307 if (len) 4308 ret = f2fs_secure_erase(prev_bdev, inode, prev_index, 4309 prev_block, len, range.flags); 4310 f2fs_update_time(sbi, REQ_TIME); 4311 out: 4312 filemap_invalidate_unlock(mapping); 4313 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 4314 err: 4315 inode_unlock(inode); 4316 mnt_drop_write_file(filp); 4317 4318 return ret; 4319 } 4320 4321 static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg) 4322 { 4323 struct inode *inode = file_inode(filp); 4324 struct f2fs_comp_option option; 4325 4326 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 4327 return -EOPNOTSUPP; 4328 4329 inode_lock_shared(inode); 4330 4331 if (!f2fs_compressed_file(inode)) { 4332 inode_unlock_shared(inode); 4333 return -ENODATA; 4334 } 4335 4336 option.algorithm = F2FS_I(inode)->i_compress_algorithm; 4337 option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size; 4338 4339 inode_unlock_shared(inode); 4340 4341 if (copy_to_user((struct f2fs_comp_option __user *)arg, &option, 4342 sizeof(option))) 4343 return -EFAULT; 4344 4345 return 0; 4346 } 4347 4348 static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) 4349 { 4350 struct inode *inode = file_inode(filp); 4351 struct f2fs_inode_info *fi = F2FS_I(inode); 4352 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4353 struct f2fs_comp_option option; 4354 int ret = 0; 4355 4356 if (!f2fs_sb_has_compression(sbi)) 4357 return -EOPNOTSUPP; 4358 4359 if (!(filp->f_mode & FMODE_WRITE)) 4360 return -EBADF; 4361 4362 if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg, 4363 sizeof(option))) 4364 return -EFAULT; 4365 4366 if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || 4367 option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || 4368 option.algorithm >= COMPRESS_MAX) 4369 return -EINVAL; 4370 4371 ret = mnt_want_write_file(filp); 4372 if (ret) 4373 return ret; 4374 inode_lock(inode); 4375 4376 f2fs_down_write(&F2FS_I(inode)->i_sem); 4377 if (!f2fs_compressed_file(inode)) { 4378 ret = -EINVAL; 4379 goto out; 4380 } 4381 4382 if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) { 4383 ret = -EBUSY; 4384 goto out; 4385 } 4386 4387 if (F2FS_HAS_BLOCKS(inode)) { 4388 ret = -EFBIG; 4389 goto out; 4390 } 4391 4392 fi->i_compress_algorithm = option.algorithm; 4393 fi->i_log_cluster_size = option.log_cluster_size; 4394 fi->i_cluster_size = BIT(option.log_cluster_size); 4395 /* Set default level */ 4396 if (fi->i_compress_algorithm == COMPRESS_ZSTD) 4397 fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; 4398 else 4399 fi->i_compress_level = 0; 4400 /* Adjust mount option level */ 4401 if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm && 4402 F2FS_OPTION(sbi).compress_level) 4403 fi->i_compress_level = F2FS_OPTION(sbi).compress_level; 4404 f2fs_mark_inode_dirty_sync(inode, true); 4405 4406 if (!f2fs_is_compress_backend_ready(inode)) 4407 f2fs_warn(sbi, "compression algorithm is successfully set, " 4408 "but current kernel doesn't support this algorithm."); 4409 out: 4410 f2fs_up_write(&fi->i_sem); 4411 inode_unlock(inode); 4412 mnt_drop_write_file(filp); 4413 4414 return ret; 4415 } 4416 4417 static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) 4418 { 4419 DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx); 4420 struct address_space *mapping = inode->i_mapping; 4421 struct folio *folio; 4422 pgoff_t redirty_idx = page_idx; 4423 int page_len = 0, ret = 0; 4424 4425 page_cache_ra_unbounded(&ractl, len, 0); 4426 4427 do { 4428 folio = read_cache_folio(mapping, page_idx, NULL, NULL); 4429 if (IS_ERR(folio)) { 4430 ret = PTR_ERR(folio); 4431 break; 4432 } 4433 page_len += folio_nr_pages(folio) - (page_idx - folio->index); 4434 page_idx = folio_next_index(folio); 4435 } while (page_len < len); 4436 4437 do { 4438 folio = filemap_lock_folio(mapping, redirty_idx); 4439 4440 /* It will never fail, when folio has pinned above */ 4441 f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(folio)); 4442 4443 f2fs_folio_wait_writeback(folio, DATA, true, true); 4444 4445 folio_mark_dirty(folio); 4446 folio_set_f2fs_gcing(folio); 4447 redirty_idx = folio_next_index(folio); 4448 folio_unlock(folio); 4449 folio_put_refs(folio, 2); 4450 } while (redirty_idx < page_idx); 4451 4452 return ret; 4453 } 4454 4455 static int f2fs_ioc_decompress_file(struct file *filp) 4456 { 4457 struct inode *inode = file_inode(filp); 4458 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4459 struct f2fs_inode_info *fi = F2FS_I(inode); 4460 pgoff_t page_idx = 0, last_idx, cluster_idx; 4461 int ret; 4462 4463 if (!f2fs_sb_has_compression(sbi) || 4464 F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) 4465 return -EOPNOTSUPP; 4466 4467 if (!(filp->f_mode & FMODE_WRITE)) 4468 return -EBADF; 4469 4470 f2fs_balance_fs(sbi, true); 4471 4472 ret = mnt_want_write_file(filp); 4473 if (ret) 4474 return ret; 4475 inode_lock(inode); 4476 4477 if (!f2fs_is_compress_backend_ready(inode)) { 4478 ret = -EOPNOTSUPP; 4479 goto out; 4480 } 4481 4482 if (!f2fs_compressed_file(inode) || 4483 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4484 ret = -EINVAL; 4485 goto out; 4486 } 4487 4488 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 4489 if (ret) 4490 goto out; 4491 4492 if (!atomic_read(&fi->i_compr_blocks)) 4493 goto out; 4494 4495 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4496 last_idx >>= fi->i_log_cluster_size; 4497 4498 for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { 4499 page_idx = cluster_idx << fi->i_log_cluster_size; 4500 4501 if (!f2fs_is_compressed_cluster(inode, page_idx)) 4502 continue; 4503 4504 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); 4505 if (ret < 0) 4506 break; 4507 4508 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { 4509 ret = filemap_fdatawrite(inode->i_mapping); 4510 if (ret < 0) 4511 break; 4512 } 4513 4514 cond_resched(); 4515 if (fatal_signal_pending(current)) { 4516 ret = -EINTR; 4517 break; 4518 } 4519 } 4520 4521 if (!ret) 4522 ret = filemap_write_and_wait_range(inode->i_mapping, 0, 4523 LLONG_MAX); 4524 4525 if (ret) 4526 f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.", 4527 __func__, ret); 4528 f2fs_update_time(sbi, REQ_TIME); 4529 out: 4530 inode_unlock(inode); 4531 mnt_drop_write_file(filp); 4532 4533 return ret; 4534 } 4535 4536 static int f2fs_ioc_compress_file(struct file *filp) 4537 { 4538 struct inode *inode = file_inode(filp); 4539 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4540 struct f2fs_inode_info *fi = F2FS_I(inode); 4541 pgoff_t page_idx = 0, last_idx, cluster_idx; 4542 int ret; 4543 4544 if (!f2fs_sb_has_compression(sbi) || 4545 F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) 4546 return -EOPNOTSUPP; 4547 4548 if (!(filp->f_mode & FMODE_WRITE)) 4549 return -EBADF; 4550 4551 f2fs_balance_fs(sbi, true); 4552 4553 ret = mnt_want_write_file(filp); 4554 if (ret) 4555 return ret; 4556 inode_lock(inode); 4557 4558 if (!f2fs_is_compress_backend_ready(inode)) { 4559 ret = -EOPNOTSUPP; 4560 goto out; 4561 } 4562 4563 if (!f2fs_compressed_file(inode) || 4564 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4565 ret = -EINVAL; 4566 goto out; 4567 } 4568 4569 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 4570 if (ret) 4571 goto out; 4572 4573 set_inode_flag(inode, FI_ENABLE_COMPRESS); 4574 4575 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4576 last_idx >>= fi->i_log_cluster_size; 4577 4578 for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { 4579 page_idx = cluster_idx << fi->i_log_cluster_size; 4580 4581 if (f2fs_is_sparse_cluster(inode, page_idx)) 4582 continue; 4583 4584 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); 4585 if (ret < 0) 4586 break; 4587 4588 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { 4589 ret = filemap_fdatawrite(inode->i_mapping); 4590 if (ret < 0) 4591 break; 4592 } 4593 4594 cond_resched(); 4595 if (fatal_signal_pending(current)) { 4596 ret = -EINTR; 4597 break; 4598 } 4599 } 4600 4601 if (!ret) 4602 ret = filemap_write_and_wait_range(inode->i_mapping, 0, 4603 LLONG_MAX); 4604 4605 clear_inode_flag(inode, FI_ENABLE_COMPRESS); 4606 4607 if (ret) 4608 f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.", 4609 __func__, ret); 4610 f2fs_update_time(sbi, REQ_TIME); 4611 out: 4612 inode_unlock(inode); 4613 mnt_drop_write_file(filp); 4614 4615 return ret; 4616 } 4617 4618 static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 4619 { 4620 switch (cmd) { 4621 case FS_IOC_GETVERSION: 4622 return f2fs_ioc_getversion(filp, arg); 4623 case F2FS_IOC_START_ATOMIC_WRITE: 4624 return f2fs_ioc_start_atomic_write(filp, false); 4625 case F2FS_IOC_START_ATOMIC_REPLACE: 4626 return f2fs_ioc_start_atomic_write(filp, true); 4627 case F2FS_IOC_COMMIT_ATOMIC_WRITE: 4628 return f2fs_ioc_commit_atomic_write(filp); 4629 case F2FS_IOC_ABORT_ATOMIC_WRITE: 4630 return f2fs_ioc_abort_atomic_write(filp); 4631 case F2FS_IOC_START_VOLATILE_WRITE: 4632 case F2FS_IOC_RELEASE_VOLATILE_WRITE: 4633 return -EOPNOTSUPP; 4634 case F2FS_IOC_SHUTDOWN: 4635 return f2fs_ioc_shutdown(filp, arg); 4636 case FITRIM: 4637 return f2fs_ioc_fitrim(filp, arg); 4638 case FS_IOC_SET_ENCRYPTION_POLICY: 4639 return f2fs_ioc_set_encryption_policy(filp, arg); 4640 case FS_IOC_GET_ENCRYPTION_POLICY: 4641 return f2fs_ioc_get_encryption_policy(filp, arg); 4642 case FS_IOC_GET_ENCRYPTION_PWSALT: 4643 return f2fs_ioc_get_encryption_pwsalt(filp, arg); 4644 case FS_IOC_GET_ENCRYPTION_POLICY_EX: 4645 return f2fs_ioc_get_encryption_policy_ex(filp, arg); 4646 case FS_IOC_ADD_ENCRYPTION_KEY: 4647 return f2fs_ioc_add_encryption_key(filp, arg); 4648 case FS_IOC_REMOVE_ENCRYPTION_KEY: 4649 return f2fs_ioc_remove_encryption_key(filp, arg); 4650 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: 4651 return f2fs_ioc_remove_encryption_key_all_users(filp, arg); 4652 case FS_IOC_GET_ENCRYPTION_KEY_STATUS: 4653 return f2fs_ioc_get_encryption_key_status(filp, arg); 4654 case FS_IOC_GET_ENCRYPTION_NONCE: 4655 return f2fs_ioc_get_encryption_nonce(filp, arg); 4656 case F2FS_IOC_GARBAGE_COLLECT: 4657 return f2fs_ioc_gc(filp, arg); 4658 case F2FS_IOC_GARBAGE_COLLECT_RANGE: 4659 return f2fs_ioc_gc_range(filp, arg); 4660 case F2FS_IOC_WRITE_CHECKPOINT: 4661 return f2fs_ioc_write_checkpoint(filp); 4662 case F2FS_IOC_DEFRAGMENT: 4663 return f2fs_ioc_defragment(filp, arg); 4664 case F2FS_IOC_MOVE_RANGE: 4665 return f2fs_ioc_move_range(filp, arg); 4666 case F2FS_IOC_FLUSH_DEVICE: 4667 return f2fs_ioc_flush_device(filp, arg); 4668 case F2FS_IOC_GET_FEATURES: 4669 return f2fs_ioc_get_features(filp, arg); 4670 case F2FS_IOC_GET_PIN_FILE: 4671 return f2fs_ioc_get_pin_file(filp, arg); 4672 case F2FS_IOC_SET_PIN_FILE: 4673 return f2fs_ioc_set_pin_file(filp, arg); 4674 case F2FS_IOC_PRECACHE_EXTENTS: 4675 return f2fs_ioc_precache_extents(filp); 4676 case F2FS_IOC_RESIZE_FS: 4677 return f2fs_ioc_resize_fs(filp, arg); 4678 case FS_IOC_ENABLE_VERITY: 4679 return f2fs_ioc_enable_verity(filp, arg); 4680 case FS_IOC_MEASURE_VERITY: 4681 return f2fs_ioc_measure_verity(filp, arg); 4682 case FS_IOC_READ_VERITY_METADATA: 4683 return f2fs_ioc_read_verity_metadata(filp, arg); 4684 case FS_IOC_GETFSLABEL: 4685 return f2fs_ioc_getfslabel(filp, arg); 4686 case FS_IOC_SETFSLABEL: 4687 return f2fs_ioc_setfslabel(filp, arg); 4688 case F2FS_IOC_GET_COMPRESS_BLOCKS: 4689 return f2fs_ioc_get_compress_blocks(filp, arg); 4690 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: 4691 return f2fs_release_compress_blocks(filp, arg); 4692 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: 4693 return f2fs_reserve_compress_blocks(filp, arg); 4694 case F2FS_IOC_SEC_TRIM_FILE: 4695 return f2fs_sec_trim_file(filp, arg); 4696 case F2FS_IOC_GET_COMPRESS_OPTION: 4697 return f2fs_ioc_get_compress_option(filp, arg); 4698 case F2FS_IOC_SET_COMPRESS_OPTION: 4699 return f2fs_ioc_set_compress_option(filp, arg); 4700 case F2FS_IOC_DECOMPRESS_FILE: 4701 return f2fs_ioc_decompress_file(filp); 4702 case F2FS_IOC_COMPRESS_FILE: 4703 return f2fs_ioc_compress_file(filp); 4704 case F2FS_IOC_GET_DEV_ALIAS_FILE: 4705 return f2fs_ioc_get_dev_alias_file(filp, arg); 4706 case F2FS_IOC_IO_PRIO: 4707 return f2fs_ioc_io_prio(filp, arg); 4708 default: 4709 return -ENOTTY; 4710 } 4711 } 4712 4713 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 4714 { 4715 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) 4716 return -EIO; 4717 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp)))) 4718 return -ENOSPC; 4719 4720 return __f2fs_ioctl(filp, cmd, arg); 4721 } 4722 4723 /* 4724 * Return %true if the given read or write request should use direct I/O, or 4725 * %false if it should use buffered I/O. 4726 */ 4727 static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb, 4728 struct iov_iter *iter) 4729 { 4730 unsigned int align; 4731 4732 if (!(iocb->ki_flags & IOCB_DIRECT)) 4733 return false; 4734 4735 if (f2fs_force_buffered_io(inode, iov_iter_rw(iter))) 4736 return false; 4737 4738 /* 4739 * Direct I/O not aligned to the disk's logical_block_size will be 4740 * attempted, but will fail with -EINVAL. 4741 * 4742 * f2fs additionally requires that direct I/O be aligned to the 4743 * filesystem block size, which is often a stricter requirement. 4744 * However, f2fs traditionally falls back to buffered I/O on requests 4745 * that are logical_block_size-aligned but not fs-block aligned. 4746 * 4747 * The below logic implements this behavior. 4748 */ 4749 align = iocb->ki_pos | iov_iter_alignment(iter); 4750 if (!IS_ALIGNED(align, i_blocksize(inode)) && 4751 IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev))) 4752 return false; 4753 4754 return true; 4755 } 4756 4757 static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error, 4758 unsigned int flags) 4759 { 4760 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); 4761 4762 dec_page_count(sbi, F2FS_DIO_READ); 4763 if (error) 4764 return error; 4765 f2fs_update_iostat(sbi, NULL, APP_DIRECT_READ_IO, size); 4766 return 0; 4767 } 4768 4769 static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = { 4770 .end_io = f2fs_dio_read_end_io, 4771 }; 4772 4773 static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) 4774 { 4775 struct file *file = iocb->ki_filp; 4776 struct inode *inode = file_inode(file); 4777 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4778 struct f2fs_inode_info *fi = F2FS_I(inode); 4779 const loff_t pos = iocb->ki_pos; 4780 const size_t count = iov_iter_count(to); 4781 struct iomap_dio *dio; 4782 ssize_t ret; 4783 4784 if (count == 0) 4785 return 0; /* skip atime update */ 4786 4787 trace_f2fs_direct_IO_enter(inode, iocb, count, READ); 4788 4789 if (iocb->ki_flags & IOCB_NOWAIT) { 4790 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { 4791 ret = -EAGAIN; 4792 goto out; 4793 } 4794 } else { 4795 f2fs_down_read(&fi->i_gc_rwsem[READ]); 4796 } 4797 4798 /* dio is not compatible w/ atomic file */ 4799 if (f2fs_is_atomic_file(inode)) { 4800 f2fs_up_read(&fi->i_gc_rwsem[READ]); 4801 ret = -EOPNOTSUPP; 4802 goto out; 4803 } 4804 4805 /* 4806 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of 4807 * the higher-level function iomap_dio_rw() in order to ensure that the 4808 * F2FS_DIO_READ counter will be decremented correctly in all cases. 4809 */ 4810 inc_page_count(sbi, F2FS_DIO_READ); 4811 dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops, 4812 &f2fs_iomap_dio_read_ops, 0, NULL, 0); 4813 if (IS_ERR_OR_NULL(dio)) { 4814 ret = PTR_ERR_OR_ZERO(dio); 4815 if (ret != -EIOCBQUEUED) 4816 dec_page_count(sbi, F2FS_DIO_READ); 4817 } else { 4818 ret = iomap_dio_complete(dio); 4819 } 4820 4821 f2fs_up_read(&fi->i_gc_rwsem[READ]); 4822 4823 file_accessed(file); 4824 out: 4825 trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret); 4826 return ret; 4827 } 4828 4829 static void f2fs_trace_rw_file_path(struct file *file, loff_t pos, size_t count, 4830 int rw) 4831 { 4832 struct inode *inode = file_inode(file); 4833 char *buf, *path; 4834 4835 buf = f2fs_getname(F2FS_I_SB(inode)); 4836 if (!buf) 4837 return; 4838 path = dentry_path_raw(file_dentry(file), buf, PATH_MAX); 4839 if (IS_ERR(path)) 4840 goto free_buf; 4841 if (rw == WRITE) 4842 trace_f2fs_datawrite_start(inode, pos, count, 4843 current->pid, path, current->comm); 4844 else 4845 trace_f2fs_dataread_start(inode, pos, count, 4846 current->pid, path, current->comm); 4847 free_buf: 4848 f2fs_putname(buf); 4849 } 4850 4851 static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 4852 { 4853 struct inode *inode = file_inode(iocb->ki_filp); 4854 const loff_t pos = iocb->ki_pos; 4855 ssize_t ret; 4856 bool dio; 4857 4858 if (!f2fs_is_compress_backend_ready(inode)) 4859 return -EOPNOTSUPP; 4860 4861 if (trace_f2fs_dataread_start_enabled()) 4862 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, 4863 iov_iter_count(to), READ); 4864 4865 dio = f2fs_should_use_dio(inode, iocb, to); 4866 4867 /* In LFS mode, if there is inflight dio, wait for its completion */ 4868 if (f2fs_lfs_mode(F2FS_I_SB(inode)) && 4869 get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) && 4870 (!f2fs_is_pinned_file(inode) || !dio)) 4871 inode_dio_wait(inode); 4872 4873 if (dio) { 4874 ret = f2fs_dio_read_iter(iocb, to); 4875 } else { 4876 ret = filemap_read(iocb, to, 0); 4877 if (ret > 0) 4878 f2fs_update_iostat(F2FS_I_SB(inode), inode, 4879 APP_BUFFERED_READ_IO, ret); 4880 } 4881 trace_f2fs_dataread_end(inode, pos, ret); 4882 return ret; 4883 } 4884 4885 static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos, 4886 struct pipe_inode_info *pipe, 4887 size_t len, unsigned int flags) 4888 { 4889 struct inode *inode = file_inode(in); 4890 const loff_t pos = *ppos; 4891 ssize_t ret; 4892 4893 if (!f2fs_is_compress_backend_ready(inode)) 4894 return -EOPNOTSUPP; 4895 4896 if (trace_f2fs_dataread_start_enabled()) 4897 f2fs_trace_rw_file_path(in, pos, len, READ); 4898 4899 ret = filemap_splice_read(in, ppos, pipe, len, flags); 4900 if (ret > 0) 4901 f2fs_update_iostat(F2FS_I_SB(inode), inode, 4902 APP_BUFFERED_READ_IO, ret); 4903 4904 trace_f2fs_dataread_end(inode, pos, ret); 4905 return ret; 4906 } 4907 4908 static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from) 4909 { 4910 struct file *file = iocb->ki_filp; 4911 struct inode *inode = file_inode(file); 4912 ssize_t count; 4913 int err; 4914 4915 if (IS_IMMUTABLE(inode)) 4916 return -EPERM; 4917 4918 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) 4919 return -EPERM; 4920 4921 count = generic_write_checks(iocb, from); 4922 if (count <= 0) 4923 return count; 4924 4925 err = file_modified(file); 4926 if (err) 4927 return err; 4928 4929 f2fs_zero_post_eof_page(inode, 4930 iocb->ki_pos + iov_iter_count(from), true); 4931 return count; 4932 } 4933 4934 /* 4935 * Preallocate blocks for a write request, if it is possible and helpful to do 4936 * so. Returns a positive number if blocks may have been preallocated, 0 if no 4937 * blocks were preallocated, or a negative errno value if something went 4938 * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the 4939 * requested blocks (not just some of them) have been allocated. 4940 */ 4941 static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, 4942 bool dio) 4943 { 4944 struct inode *inode = file_inode(iocb->ki_filp); 4945 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4946 const loff_t pos = iocb->ki_pos; 4947 const size_t count = iov_iter_count(iter); 4948 struct f2fs_map_blocks map = {}; 4949 int flag; 4950 int ret; 4951 4952 /* If it will be an out-of-place direct write, don't bother. */ 4953 if (dio && f2fs_lfs_mode(sbi)) 4954 return 0; 4955 /* 4956 * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into 4957 * buffered IO, if DIO meets any holes. 4958 */ 4959 if (dio && i_size_read(inode) && 4960 (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode)))) 4961 return 0; 4962 4963 /* No-wait I/O can't allocate blocks. */ 4964 if (iocb->ki_flags & IOCB_NOWAIT) 4965 return 0; 4966 4967 /* If it will be a short write, don't bother. */ 4968 if (fault_in_iov_iter_readable(iter, count)) 4969 return 0; 4970 4971 if (f2fs_has_inline_data(inode)) { 4972 /* If the data will fit inline, don't bother. */ 4973 if (pos + count <= MAX_INLINE_DATA(inode)) 4974 return 0; 4975 ret = f2fs_convert_inline_inode(inode); 4976 if (ret) 4977 return ret; 4978 } 4979 4980 /* Do not preallocate blocks that will be written partially in 4KB. */ 4981 map.m_lblk = F2FS_BLK_ALIGN(pos); 4982 map.m_len = F2FS_BYTES_TO_BLK(pos + count); 4983 if (map.m_len > map.m_lblk) 4984 map.m_len -= map.m_lblk; 4985 else 4986 return 0; 4987 4988 if (!IS_DEVICE_ALIASING(inode)) 4989 map.m_may_create = true; 4990 if (dio) { 4991 map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, 4992 inode->i_write_hint); 4993 flag = F2FS_GET_BLOCK_PRE_DIO; 4994 } else { 4995 map.m_seg_type = NO_CHECK_TYPE; 4996 flag = F2FS_GET_BLOCK_PRE_AIO; 4997 } 4998 4999 ret = f2fs_map_blocks(inode, &map, flag); 5000 /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */ 5001 if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0)) 5002 return ret; 5003 if (ret == 0) 5004 set_inode_flag(inode, FI_PREALLOCATED_ALL); 5005 return map.m_len; 5006 } 5007 5008 static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb, 5009 struct iov_iter *from) 5010 { 5011 struct file *file = iocb->ki_filp; 5012 struct inode *inode = file_inode(file); 5013 ssize_t ret; 5014 5015 if (iocb->ki_flags & IOCB_NOWAIT) 5016 return -EOPNOTSUPP; 5017 5018 ret = generic_perform_write(iocb, from); 5019 5020 if (ret > 0) { 5021 f2fs_update_iostat(F2FS_I_SB(inode), inode, 5022 APP_BUFFERED_IO, ret); 5023 } 5024 return ret; 5025 } 5026 5027 static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error, 5028 unsigned int flags) 5029 { 5030 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); 5031 5032 dec_page_count(sbi, F2FS_DIO_WRITE); 5033 if (error) 5034 return error; 5035 f2fs_update_time(sbi, REQ_TIME); 5036 f2fs_update_iostat(sbi, NULL, APP_DIRECT_IO, size); 5037 return 0; 5038 } 5039 5040 static void f2fs_dio_write_submit_io(const struct iomap_iter *iter, 5041 struct bio *bio, loff_t file_offset) 5042 { 5043 struct inode *inode = iter->inode; 5044 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 5045 enum log_type type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint); 5046 enum temp_type temp = f2fs_get_segment_temp(sbi, type); 5047 5048 bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp); 5049 submit_bio(bio); 5050 } 5051 5052 static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = { 5053 .end_io = f2fs_dio_write_end_io, 5054 .submit_io = f2fs_dio_write_submit_io, 5055 }; 5056 5057 static void f2fs_flush_buffered_write(struct address_space *mapping, 5058 loff_t start_pos, loff_t end_pos) 5059 { 5060 int ret; 5061 5062 ret = filemap_write_and_wait_range(mapping, start_pos, end_pos); 5063 if (ret < 0) 5064 return; 5065 invalidate_mapping_pages(mapping, 5066 start_pos >> PAGE_SHIFT, 5067 end_pos >> PAGE_SHIFT); 5068 } 5069 5070 static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, 5071 bool *may_need_sync) 5072 { 5073 struct file *file = iocb->ki_filp; 5074 struct inode *inode = file_inode(file); 5075 struct f2fs_inode_info *fi = F2FS_I(inode); 5076 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 5077 const bool do_opu = f2fs_lfs_mode(sbi); 5078 const loff_t pos = iocb->ki_pos; 5079 const ssize_t count = iov_iter_count(from); 5080 unsigned int dio_flags; 5081 struct iomap_dio *dio; 5082 ssize_t ret; 5083 5084 trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE); 5085 5086 if (iocb->ki_flags & IOCB_NOWAIT) { 5087 /* f2fs_convert_inline_inode() and block allocation can block */ 5088 if (f2fs_has_inline_data(inode) || 5089 !f2fs_overwrite_io(inode, pos, count)) { 5090 ret = -EAGAIN; 5091 goto out; 5092 } 5093 5094 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) { 5095 ret = -EAGAIN; 5096 goto out; 5097 } 5098 if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { 5099 f2fs_up_read(&fi->i_gc_rwsem[WRITE]); 5100 ret = -EAGAIN; 5101 goto out; 5102 } 5103 } else { 5104 ret = f2fs_convert_inline_inode(inode); 5105 if (ret) 5106 goto out; 5107 5108 f2fs_down_read(&fi->i_gc_rwsem[WRITE]); 5109 if (do_opu) 5110 f2fs_down_read(&fi->i_gc_rwsem[READ]); 5111 } 5112 5113 /* 5114 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of 5115 * the higher-level function iomap_dio_rw() in order to ensure that the 5116 * F2FS_DIO_WRITE counter will be decremented correctly in all cases. 5117 */ 5118 inc_page_count(sbi, F2FS_DIO_WRITE); 5119 dio_flags = 0; 5120 if (pos + count > inode->i_size) 5121 dio_flags |= IOMAP_DIO_FORCE_WAIT; 5122 dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops, 5123 &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0); 5124 if (IS_ERR_OR_NULL(dio)) { 5125 ret = PTR_ERR_OR_ZERO(dio); 5126 if (ret == -ENOTBLK) 5127 ret = 0; 5128 if (ret != -EIOCBQUEUED) 5129 dec_page_count(sbi, F2FS_DIO_WRITE); 5130 } else { 5131 ret = iomap_dio_complete(dio); 5132 } 5133 5134 if (do_opu) 5135 f2fs_up_read(&fi->i_gc_rwsem[READ]); 5136 f2fs_up_read(&fi->i_gc_rwsem[WRITE]); 5137 5138 if (ret < 0) 5139 goto out; 5140 if (pos + ret > inode->i_size) 5141 f2fs_i_size_write(inode, pos + ret); 5142 if (!do_opu) 5143 set_inode_flag(inode, FI_UPDATE_WRITE); 5144 5145 if (iov_iter_count(from)) { 5146 ssize_t ret2; 5147 loff_t bufio_start_pos = iocb->ki_pos; 5148 5149 /* 5150 * The direct write was partial, so we need to fall back to a 5151 * buffered write for the remainder. 5152 */ 5153 5154 ret2 = f2fs_buffered_write_iter(iocb, from); 5155 if (iov_iter_count(from)) 5156 f2fs_write_failed(inode, iocb->ki_pos); 5157 if (ret2 < 0) 5158 goto out; 5159 5160 /* 5161 * Ensure that the pagecache pages are written to disk and 5162 * invalidated to preserve the expected O_DIRECT semantics. 5163 */ 5164 if (ret2 > 0) { 5165 loff_t bufio_end_pos = bufio_start_pos + ret2 - 1; 5166 5167 ret += ret2; 5168 5169 f2fs_flush_buffered_write(file->f_mapping, 5170 bufio_start_pos, 5171 bufio_end_pos); 5172 } 5173 } else { 5174 /* iomap_dio_rw() already handled the generic_write_sync(). */ 5175 *may_need_sync = false; 5176 } 5177 out: 5178 trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret); 5179 return ret; 5180 } 5181 5182 static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 5183 { 5184 struct inode *inode = file_inode(iocb->ki_filp); 5185 const loff_t orig_pos = iocb->ki_pos; 5186 const size_t orig_count = iov_iter_count(from); 5187 loff_t target_size; 5188 bool dio; 5189 bool may_need_sync = true; 5190 int preallocated; 5191 const loff_t pos = iocb->ki_pos; 5192 const ssize_t count = iov_iter_count(from); 5193 ssize_t ret; 5194 5195 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { 5196 ret = -EIO; 5197 goto out; 5198 } 5199 5200 if (!f2fs_is_compress_backend_ready(inode)) { 5201 ret = -EOPNOTSUPP; 5202 goto out; 5203 } 5204 5205 if (iocb->ki_flags & IOCB_NOWAIT) { 5206 if (!inode_trylock(inode)) { 5207 ret = -EAGAIN; 5208 goto out; 5209 } 5210 } else { 5211 inode_lock(inode); 5212 } 5213 5214 if (f2fs_is_pinned_file(inode) && 5215 !f2fs_overwrite_io(inode, pos, count)) { 5216 ret = -EIO; 5217 goto out_unlock; 5218 } 5219 5220 ret = f2fs_write_checks(iocb, from); 5221 if (ret <= 0) 5222 goto out_unlock; 5223 5224 /* Determine whether we will do a direct write or a buffered write. */ 5225 dio = f2fs_should_use_dio(inode, iocb, from); 5226 5227 /* dio is not compatible w/ atomic write */ 5228 if (dio && f2fs_is_atomic_file(inode)) { 5229 ret = -EOPNOTSUPP; 5230 goto out_unlock; 5231 } 5232 5233 /* Possibly preallocate the blocks for the write. */ 5234 target_size = iocb->ki_pos + iov_iter_count(from); 5235 preallocated = f2fs_preallocate_blocks(iocb, from, dio); 5236 if (preallocated < 0) { 5237 ret = preallocated; 5238 } else { 5239 if (trace_f2fs_datawrite_start_enabled()) 5240 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, 5241 orig_count, WRITE); 5242 5243 /* Do the actual write. */ 5244 ret = dio ? 5245 f2fs_dio_write_iter(iocb, from, &may_need_sync) : 5246 f2fs_buffered_write_iter(iocb, from); 5247 5248 trace_f2fs_datawrite_end(inode, orig_pos, ret); 5249 } 5250 5251 /* Don't leave any preallocated blocks around past i_size. */ 5252 if (preallocated && i_size_read(inode) < target_size) { 5253 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 5254 filemap_invalidate_lock(inode->i_mapping); 5255 if (!f2fs_truncate(inode)) 5256 file_dont_truncate(inode); 5257 filemap_invalidate_unlock(inode->i_mapping); 5258 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 5259 } else { 5260 file_dont_truncate(inode); 5261 } 5262 5263 clear_inode_flag(inode, FI_PREALLOCATED_ALL); 5264 out_unlock: 5265 inode_unlock(inode); 5266 out: 5267 trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret); 5268 5269 if (ret > 0 && may_need_sync) 5270 ret = generic_write_sync(iocb, ret); 5271 5272 /* If buffered IO was forced, flush and drop the data from 5273 * the page cache to preserve O_DIRECT semantics 5274 */ 5275 if (ret > 0 && !dio && (iocb->ki_flags & IOCB_DIRECT)) 5276 f2fs_flush_buffered_write(iocb->ki_filp->f_mapping, 5277 orig_pos, 5278 orig_pos + ret - 1); 5279 5280 return ret; 5281 } 5282 5283 static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len, 5284 int advice) 5285 { 5286 struct address_space *mapping; 5287 struct backing_dev_info *bdi; 5288 struct inode *inode = file_inode(filp); 5289 int err; 5290 5291 trace_f2fs_fadvise(inode, offset, len, advice); 5292 5293 if (advice == POSIX_FADV_SEQUENTIAL) { 5294 if (S_ISFIFO(inode->i_mode)) 5295 return -ESPIPE; 5296 5297 mapping = filp->f_mapping; 5298 if (!mapping || len < 0) 5299 return -EINVAL; 5300 5301 bdi = inode_to_bdi(mapping->host); 5302 filp->f_ra.ra_pages = bdi->ra_pages * 5303 F2FS_I_SB(inode)->seq_file_ra_mul; 5304 spin_lock(&filp->f_lock); 5305 filp->f_mode &= ~FMODE_RANDOM; 5306 spin_unlock(&filp->f_lock); 5307 return 0; 5308 } else if (advice == POSIX_FADV_WILLNEED && offset == 0) { 5309 /* Load extent cache at the first readahead. */ 5310 f2fs_precache_extents(inode); 5311 } 5312 5313 err = generic_fadvise(filp, offset, len, advice); 5314 if (err) 5315 return err; 5316 5317 if (advice == POSIX_FADV_DONTNEED && 5318 (test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) && 5319 f2fs_compressed_file(inode))) 5320 f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino); 5321 else if (advice == POSIX_FADV_NOREUSE) 5322 err = f2fs_keep_noreuse_range(inode, offset, len); 5323 return err; 5324 } 5325 5326 #ifdef CONFIG_COMPAT 5327 struct compat_f2fs_gc_range { 5328 u32 sync; 5329 compat_u64 start; 5330 compat_u64 len; 5331 }; 5332 #define F2FS_IOC32_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11,\ 5333 struct compat_f2fs_gc_range) 5334 5335 static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg) 5336 { 5337 struct compat_f2fs_gc_range __user *urange; 5338 struct f2fs_gc_range range; 5339 int err; 5340 5341 urange = compat_ptr(arg); 5342 err = get_user(range.sync, &urange->sync); 5343 err |= get_user(range.start, &urange->start); 5344 err |= get_user(range.len, &urange->len); 5345 if (err) 5346 return -EFAULT; 5347 5348 return __f2fs_ioc_gc_range(file, &range); 5349 } 5350 5351 struct compat_f2fs_move_range { 5352 u32 dst_fd; 5353 compat_u64 pos_in; 5354 compat_u64 pos_out; 5355 compat_u64 len; 5356 }; 5357 #define F2FS_IOC32_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ 5358 struct compat_f2fs_move_range) 5359 5360 static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg) 5361 { 5362 struct compat_f2fs_move_range __user *urange; 5363 struct f2fs_move_range range; 5364 int err; 5365 5366 urange = compat_ptr(arg); 5367 err = get_user(range.dst_fd, &urange->dst_fd); 5368 err |= get_user(range.pos_in, &urange->pos_in); 5369 err |= get_user(range.pos_out, &urange->pos_out); 5370 err |= get_user(range.len, &urange->len); 5371 if (err) 5372 return -EFAULT; 5373 5374 return __f2fs_ioc_move_range(file, &range); 5375 } 5376 5377 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 5378 { 5379 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) 5380 return -EIO; 5381 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file)))) 5382 return -ENOSPC; 5383 5384 switch (cmd) { 5385 case FS_IOC32_GETVERSION: 5386 cmd = FS_IOC_GETVERSION; 5387 break; 5388 case F2FS_IOC32_GARBAGE_COLLECT_RANGE: 5389 return f2fs_compat_ioc_gc_range(file, arg); 5390 case F2FS_IOC32_MOVE_RANGE: 5391 return f2fs_compat_ioc_move_range(file, arg); 5392 case F2FS_IOC_START_ATOMIC_WRITE: 5393 case F2FS_IOC_START_ATOMIC_REPLACE: 5394 case F2FS_IOC_COMMIT_ATOMIC_WRITE: 5395 case F2FS_IOC_START_VOLATILE_WRITE: 5396 case F2FS_IOC_RELEASE_VOLATILE_WRITE: 5397 case F2FS_IOC_ABORT_ATOMIC_WRITE: 5398 case F2FS_IOC_SHUTDOWN: 5399 case FITRIM: 5400 case FS_IOC_SET_ENCRYPTION_POLICY: 5401 case FS_IOC_GET_ENCRYPTION_PWSALT: 5402 case FS_IOC_GET_ENCRYPTION_POLICY: 5403 case FS_IOC_GET_ENCRYPTION_POLICY_EX: 5404 case FS_IOC_ADD_ENCRYPTION_KEY: 5405 case FS_IOC_REMOVE_ENCRYPTION_KEY: 5406 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: 5407 case FS_IOC_GET_ENCRYPTION_KEY_STATUS: 5408 case FS_IOC_GET_ENCRYPTION_NONCE: 5409 case F2FS_IOC_GARBAGE_COLLECT: 5410 case F2FS_IOC_WRITE_CHECKPOINT: 5411 case F2FS_IOC_DEFRAGMENT: 5412 case F2FS_IOC_FLUSH_DEVICE: 5413 case F2FS_IOC_GET_FEATURES: 5414 case F2FS_IOC_GET_PIN_FILE: 5415 case F2FS_IOC_SET_PIN_FILE: 5416 case F2FS_IOC_PRECACHE_EXTENTS: 5417 case F2FS_IOC_RESIZE_FS: 5418 case FS_IOC_ENABLE_VERITY: 5419 case FS_IOC_MEASURE_VERITY: 5420 case FS_IOC_READ_VERITY_METADATA: 5421 case FS_IOC_GETFSLABEL: 5422 case FS_IOC_SETFSLABEL: 5423 case F2FS_IOC_GET_COMPRESS_BLOCKS: 5424 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: 5425 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: 5426 case F2FS_IOC_SEC_TRIM_FILE: 5427 case F2FS_IOC_GET_COMPRESS_OPTION: 5428 case F2FS_IOC_SET_COMPRESS_OPTION: 5429 case F2FS_IOC_DECOMPRESS_FILE: 5430 case F2FS_IOC_COMPRESS_FILE: 5431 case F2FS_IOC_GET_DEV_ALIAS_FILE: 5432 case F2FS_IOC_IO_PRIO: 5433 break; 5434 default: 5435 return -ENOIOCTLCMD; 5436 } 5437 return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); 5438 } 5439 #endif 5440 5441 const struct file_operations f2fs_file_operations = { 5442 .llseek = f2fs_llseek, 5443 .read_iter = f2fs_file_read_iter, 5444 .write_iter = f2fs_file_write_iter, 5445 .iopoll = iocb_bio_iopoll, 5446 .open = f2fs_file_open, 5447 .release = f2fs_release_file, 5448 .mmap_prepare = f2fs_file_mmap_prepare, 5449 .flush = f2fs_file_flush, 5450 .fsync = f2fs_sync_file, 5451 .fallocate = f2fs_fallocate, 5452 .unlocked_ioctl = f2fs_ioctl, 5453 #ifdef CONFIG_COMPAT 5454 .compat_ioctl = f2fs_compat_ioctl, 5455 #endif 5456 .splice_read = f2fs_file_splice_read, 5457 .splice_write = iter_file_splice_write, 5458 .fadvise = f2fs_file_fadvise, 5459 .fop_flags = FOP_BUFFER_RASYNC, 5460 }; 5461