1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/f2fs/file.c 4 * 5 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 6 * http://www.samsung.com/ 7 */ 8 #include <linux/fs.h> 9 #include <linux/f2fs_fs.h> 10 #include <linux/stat.h> 11 #include <linux/writeback.h> 12 #include <linux/blkdev.h> 13 #include <linux/falloc.h> 14 #include <linux/types.h> 15 #include <linux/compat.h> 16 #include <linux/uaccess.h> 17 #include <linux/mount.h> 18 #include <linux/pagevec.h> 19 #include <linux/uio.h> 20 #include <linux/uuid.h> 21 #include <linux/file.h> 22 #include <linux/nls.h> 23 #include <linux/sched/signal.h> 24 #include <linux/fileattr.h> 25 #include <linux/fadvise.h> 26 #include <linux/iomap.h> 27 28 #include "f2fs.h" 29 #include "node.h" 30 #include "segment.h" 31 #include "xattr.h" 32 #include "acl.h" 33 #include "gc.h" 34 #include "iostat.h" 35 #include <trace/events/f2fs.h> 36 #include <uapi/linux/f2fs.h> 37 38 static void f2fs_zero_post_eof_page(struct inode *inode, loff_t new_size) 39 { 40 loff_t old_size = i_size_read(inode); 41 42 if (old_size >= new_size) 43 return; 44 45 /* zero or drop pages only in range of [old_size, new_size] */ 46 truncate_pagecache(inode, old_size); 47 } 48 49 static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) 50 { 51 struct inode *inode = file_inode(vmf->vma->vm_file); 52 vm_flags_t flags = vmf->vma->vm_flags; 53 vm_fault_t ret; 54 55 ret = filemap_fault(vmf); 56 if (ret & VM_FAULT_LOCKED) 57 f2fs_update_iostat(F2FS_I_SB(inode), inode, 58 APP_MAPPED_READ_IO, F2FS_BLKSIZE); 59 60 trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret); 61 62 return ret; 63 } 64 65 static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) 66 { 67 struct folio *folio = page_folio(vmf->page); 68 struct inode *inode = file_inode(vmf->vma->vm_file); 69 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 70 struct dnode_of_data dn; 71 bool need_alloc = !f2fs_is_pinned_file(inode); 72 int err = 0; 73 vm_fault_t ret; 74 75 if (unlikely(IS_IMMUTABLE(inode))) 76 return VM_FAULT_SIGBUS; 77 78 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 79 err = -EIO; 80 goto out; 81 } 82 83 if (unlikely(f2fs_cp_error(sbi))) { 84 err = -EIO; 85 goto out; 86 } 87 88 if (!f2fs_is_checkpoint_ready(sbi)) { 89 err = -ENOSPC; 90 goto out; 91 } 92 93 err = f2fs_convert_inline_inode(inode); 94 if (err) 95 goto out; 96 97 #ifdef CONFIG_F2FS_FS_COMPRESSION 98 if (f2fs_compressed_file(inode)) { 99 int ret = f2fs_is_compressed_cluster(inode, folio->index); 100 101 if (ret < 0) { 102 err = ret; 103 goto out; 104 } else if (ret) { 105 need_alloc = false; 106 } 107 } 108 #endif 109 /* should do out of any locked page */ 110 if (need_alloc) 111 f2fs_balance_fs(sbi, true); 112 113 sb_start_pagefault(inode->i_sb); 114 115 f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); 116 117 filemap_invalidate_lock(inode->i_mapping); 118 f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT); 119 filemap_invalidate_unlock(inode->i_mapping); 120 121 file_update_time(vmf->vma->vm_file); 122 filemap_invalidate_lock_shared(inode->i_mapping); 123 124 folio_lock(folio); 125 if (unlikely(folio->mapping != inode->i_mapping || 126 folio_pos(folio) > i_size_read(inode) || 127 !folio_test_uptodate(folio))) { 128 folio_unlock(folio); 129 err = -EFAULT; 130 goto out_sem; 131 } 132 133 set_new_dnode(&dn, inode, NULL, NULL, 0); 134 if (need_alloc) { 135 /* block allocation */ 136 err = f2fs_get_block_locked(&dn, folio->index); 137 } else { 138 err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); 139 f2fs_put_dnode(&dn); 140 if (f2fs_is_pinned_file(inode) && 141 !__is_valid_data_blkaddr(dn.data_blkaddr)) 142 err = -EIO; 143 } 144 145 if (err) { 146 folio_unlock(folio); 147 goto out_sem; 148 } 149 150 f2fs_folio_wait_writeback(folio, DATA, false, true); 151 152 /* wait for GCed page writeback via META_MAPPING */ 153 f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); 154 155 /* 156 * check to see if the page is mapped already (no holes) 157 */ 158 if (folio_test_mappedtodisk(folio)) 159 goto out_sem; 160 161 /* page is wholly or partially inside EOF */ 162 if (((loff_t)(folio->index + 1) << PAGE_SHIFT) > 163 i_size_read(inode)) { 164 loff_t offset; 165 166 offset = i_size_read(inode) & ~PAGE_MASK; 167 folio_zero_segment(folio, offset, folio_size(folio)); 168 } 169 folio_mark_dirty(folio); 170 171 f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE); 172 f2fs_update_time(sbi, REQ_TIME); 173 174 out_sem: 175 filemap_invalidate_unlock_shared(inode->i_mapping); 176 177 sb_end_pagefault(inode->i_sb); 178 out: 179 ret = vmf_fs_error(err); 180 181 trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret); 182 return ret; 183 } 184 185 static const struct vm_operations_struct f2fs_file_vm_ops = { 186 .fault = f2fs_filemap_fault, 187 .map_pages = filemap_map_pages, 188 .page_mkwrite = f2fs_vm_page_mkwrite, 189 }; 190 191 static int get_parent_ino(struct inode *inode, nid_t *pino) 192 { 193 struct dentry *dentry; 194 195 /* 196 * Make sure to get the non-deleted alias. The alias associated with 197 * the open file descriptor being fsync()'ed may be deleted already. 198 */ 199 dentry = d_find_alias(inode); 200 if (!dentry) 201 return 0; 202 203 *pino = d_parent_ino(dentry); 204 dput(dentry); 205 return 1; 206 } 207 208 static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) 209 { 210 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 211 enum cp_reason_type cp_reason = CP_NO_NEEDED; 212 213 if (!S_ISREG(inode->i_mode)) 214 cp_reason = CP_NON_REGULAR; 215 else if (f2fs_compressed_file(inode)) 216 cp_reason = CP_COMPRESSED; 217 else if (inode->i_nlink != 1) 218 cp_reason = CP_HARDLINK; 219 else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) 220 cp_reason = CP_SB_NEED_CP; 221 else if (file_wrong_pino(inode)) 222 cp_reason = CP_WRONG_PINO; 223 else if (!f2fs_space_for_roll_forward(sbi)) 224 cp_reason = CP_NO_SPC_ROLL; 225 else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) 226 cp_reason = CP_NODE_NEED_CP; 227 else if (test_opt(sbi, FASTBOOT)) 228 cp_reason = CP_FASTBOOT_MODE; 229 else if (F2FS_OPTION(sbi).active_logs == 2) 230 cp_reason = CP_SPEC_LOG_NUM; 231 else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT && 232 f2fs_need_dentry_mark(sbi, inode->i_ino) && 233 f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, 234 TRANS_DIR_INO)) 235 cp_reason = CP_RECOVER_DIR; 236 else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, 237 XATTR_DIR_INO)) 238 cp_reason = CP_XATTR_DIR; 239 240 return cp_reason; 241 } 242 243 static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) 244 { 245 struct folio *i = filemap_get_folio(NODE_MAPPING(sbi), ino); 246 bool ret = false; 247 /* But we need to avoid that there are some inode updates */ 248 if ((!IS_ERR(i) && folio_test_dirty(i)) || 249 f2fs_need_inode_block_update(sbi, ino)) 250 ret = true; 251 f2fs_folio_put(i, false); 252 return ret; 253 } 254 255 static void try_to_fix_pino(struct inode *inode) 256 { 257 struct f2fs_inode_info *fi = F2FS_I(inode); 258 nid_t pino; 259 260 f2fs_down_write(&fi->i_sem); 261 if (file_wrong_pino(inode) && inode->i_nlink == 1 && 262 get_parent_ino(inode, &pino)) { 263 f2fs_i_pino_write(inode, pino); 264 file_got_pino(inode); 265 } 266 f2fs_up_write(&fi->i_sem); 267 } 268 269 static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, 270 int datasync, bool atomic) 271 { 272 struct inode *inode = file->f_mapping->host; 273 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 274 nid_t ino = inode->i_ino; 275 int ret = 0; 276 enum cp_reason_type cp_reason = 0; 277 struct writeback_control wbc = { 278 .sync_mode = WB_SYNC_ALL, 279 .nr_to_write = LONG_MAX, 280 }; 281 unsigned int seq_id = 0; 282 283 if (unlikely(f2fs_readonly(inode->i_sb))) 284 return 0; 285 286 trace_f2fs_sync_file_enter(inode); 287 288 if (S_ISDIR(inode->i_mode)) 289 goto go_write; 290 291 /* if fdatasync is triggered, let's do in-place-update */ 292 if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) 293 set_inode_flag(inode, FI_NEED_IPU); 294 ret = file_write_and_wait_range(file, start, end); 295 clear_inode_flag(inode, FI_NEED_IPU); 296 297 if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { 298 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); 299 return ret; 300 } 301 302 /* if the inode is dirty, let's recover all the time */ 303 if (!f2fs_skip_inode_update(inode, datasync)) { 304 f2fs_write_inode(inode, NULL); 305 goto go_write; 306 } 307 308 /* 309 * if there is no written data, don't waste time to write recovery info. 310 */ 311 if (!is_inode_flag_set(inode, FI_APPEND_WRITE) && 312 !f2fs_exist_written_data(sbi, ino, APPEND_INO)) { 313 314 /* it may call write_inode just prior to fsync */ 315 if (need_inode_page_update(sbi, ino)) 316 goto go_write; 317 318 if (is_inode_flag_set(inode, FI_UPDATE_WRITE) || 319 f2fs_exist_written_data(sbi, ino, UPDATE_INO)) 320 goto flush_out; 321 goto out; 322 } else { 323 /* 324 * for OPU case, during fsync(), node can be persisted before 325 * data when lower device doesn't support write barrier, result 326 * in data corruption after SPO. 327 * So for strict fsync mode, force to use atomic write semantics 328 * to keep write order in between data/node and last node to 329 * avoid potential data corruption. 330 */ 331 if (F2FS_OPTION(sbi).fsync_mode == 332 FSYNC_MODE_STRICT && !atomic) 333 atomic = true; 334 } 335 go_write: 336 /* 337 * Both of fdatasync() and fsync() are able to be recovered from 338 * sudden-power-off. 339 */ 340 f2fs_down_read(&F2FS_I(inode)->i_sem); 341 cp_reason = need_do_checkpoint(inode); 342 f2fs_up_read(&F2FS_I(inode)->i_sem); 343 344 if (cp_reason) { 345 /* all the dirty node pages should be flushed for POR */ 346 ret = f2fs_sync_fs(inode->i_sb, 1); 347 348 /* 349 * We've secured consistency through sync_fs. Following pino 350 * will be used only for fsynced inodes after checkpoint. 351 */ 352 try_to_fix_pino(inode); 353 clear_inode_flag(inode, FI_APPEND_WRITE); 354 clear_inode_flag(inode, FI_UPDATE_WRITE); 355 goto out; 356 } 357 sync_nodes: 358 atomic_inc(&sbi->wb_sync_req[NODE]); 359 ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id); 360 atomic_dec(&sbi->wb_sync_req[NODE]); 361 if (ret) 362 goto out; 363 364 /* if cp_error was enabled, we should avoid infinite loop */ 365 if (unlikely(f2fs_cp_error(sbi))) { 366 ret = -EIO; 367 goto out; 368 } 369 370 if (f2fs_need_inode_block_update(sbi, ino)) { 371 f2fs_mark_inode_dirty_sync(inode, true); 372 f2fs_write_inode(inode, NULL); 373 goto sync_nodes; 374 } 375 376 /* 377 * If it's atomic_write, it's just fine to keep write ordering. So 378 * here we don't need to wait for node write completion, since we use 379 * node chain which serializes node blocks. If one of node writes are 380 * reordered, we can see simply broken chain, resulting in stopping 381 * roll-forward recovery. It means we'll recover all or none node blocks 382 * given fsync mark. 383 */ 384 if (!atomic) { 385 ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id); 386 if (ret) 387 goto out; 388 } 389 390 /* once recovery info is written, don't need to tack this */ 391 f2fs_remove_ino_entry(sbi, ino, APPEND_INO); 392 clear_inode_flag(inode, FI_APPEND_WRITE); 393 flush_out: 394 if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) 395 ret = f2fs_issue_flush(sbi, inode->i_ino); 396 if (!ret) { 397 f2fs_remove_ino_entry(sbi, ino, UPDATE_INO); 398 clear_inode_flag(inode, FI_UPDATE_WRITE); 399 f2fs_remove_ino_entry(sbi, ino, FLUSH_INO); 400 } 401 f2fs_update_time(sbi, REQ_TIME); 402 out: 403 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); 404 return ret; 405 } 406 407 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 408 { 409 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) 410 return -EIO; 411 return f2fs_do_sync_file(file, start, end, datasync, false); 412 } 413 414 static bool __found_offset(struct address_space *mapping, 415 struct dnode_of_data *dn, pgoff_t index, int whence) 416 { 417 block_t blkaddr = f2fs_data_blkaddr(dn); 418 struct inode *inode = mapping->host; 419 bool compressed_cluster = false; 420 421 if (f2fs_compressed_file(inode)) { 422 block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_folio, 423 ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size)); 424 425 compressed_cluster = first_blkaddr == COMPRESS_ADDR; 426 } 427 428 switch (whence) { 429 case SEEK_DATA: 430 if (__is_valid_data_blkaddr(blkaddr)) 431 return true; 432 if (blkaddr == NEW_ADDR && 433 xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY)) 434 return true; 435 if (compressed_cluster) 436 return true; 437 break; 438 case SEEK_HOLE: 439 if (compressed_cluster) 440 return false; 441 if (blkaddr == NULL_ADDR) 442 return true; 443 break; 444 } 445 return false; 446 } 447 448 static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) 449 { 450 struct inode *inode = file->f_mapping->host; 451 loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 452 struct dnode_of_data dn; 453 pgoff_t pgofs, end_offset; 454 loff_t data_ofs = offset; 455 loff_t isize; 456 int err = 0; 457 458 inode_lock_shared(inode); 459 460 isize = i_size_read(inode); 461 if (offset >= isize) 462 goto fail; 463 464 /* handle inline data case */ 465 if (f2fs_has_inline_data(inode)) { 466 if (whence == SEEK_HOLE) { 467 data_ofs = isize; 468 goto found; 469 } else if (whence == SEEK_DATA) { 470 data_ofs = offset; 471 goto found; 472 } 473 } 474 475 pgofs = (pgoff_t)(offset >> PAGE_SHIFT); 476 477 for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) { 478 set_new_dnode(&dn, inode, NULL, NULL, 0); 479 err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE); 480 if (err && err != -ENOENT) { 481 goto fail; 482 } else if (err == -ENOENT) { 483 /* direct node does not exists */ 484 if (whence == SEEK_DATA) { 485 pgofs = f2fs_get_next_page_offset(&dn, pgofs); 486 continue; 487 } else { 488 goto found; 489 } 490 } 491 492 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 493 494 /* find data/hole in dnode block */ 495 for (; dn.ofs_in_node < end_offset; 496 dn.ofs_in_node++, pgofs++, 497 data_ofs = (loff_t)pgofs << PAGE_SHIFT) { 498 block_t blkaddr; 499 500 blkaddr = f2fs_data_blkaddr(&dn); 501 502 if (__is_valid_data_blkaddr(blkaddr) && 503 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), 504 blkaddr, DATA_GENERIC_ENHANCE)) { 505 f2fs_put_dnode(&dn); 506 goto fail; 507 } 508 509 if (__found_offset(file->f_mapping, &dn, 510 pgofs, whence)) { 511 f2fs_put_dnode(&dn); 512 goto found; 513 } 514 } 515 f2fs_put_dnode(&dn); 516 } 517 518 if (whence == SEEK_DATA) 519 goto fail; 520 found: 521 if (whence == SEEK_HOLE && data_ofs > isize) 522 data_ofs = isize; 523 inode_unlock_shared(inode); 524 return vfs_setpos(file, data_ofs, maxbytes); 525 fail: 526 inode_unlock_shared(inode); 527 return -ENXIO; 528 } 529 530 static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) 531 { 532 struct inode *inode = file->f_mapping->host; 533 loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 534 535 switch (whence) { 536 case SEEK_SET: 537 case SEEK_CUR: 538 case SEEK_END: 539 return generic_file_llseek_size(file, offset, whence, 540 maxbytes, i_size_read(inode)); 541 case SEEK_DATA: 542 case SEEK_HOLE: 543 if (offset < 0) 544 return -ENXIO; 545 return f2fs_seek_block(file, offset, whence); 546 } 547 548 return -EINVAL; 549 } 550 551 static int f2fs_file_mmap_prepare(struct vm_area_desc *desc) 552 { 553 struct file *file = desc->file; 554 struct inode *inode = file_inode(file); 555 556 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 557 return -EIO; 558 559 if (!f2fs_is_compress_backend_ready(inode)) 560 return -EOPNOTSUPP; 561 562 file_accessed(file); 563 desc->vm_ops = &f2fs_file_vm_ops; 564 565 f2fs_down_read(&F2FS_I(inode)->i_sem); 566 set_inode_flag(inode, FI_MMAP_FILE); 567 f2fs_up_read(&F2FS_I(inode)->i_sem); 568 569 return 0; 570 } 571 572 static int finish_preallocate_blocks(struct inode *inode) 573 { 574 int ret = 0; 575 bool opened; 576 577 f2fs_down_read(&F2FS_I(inode)->i_sem); 578 opened = is_inode_flag_set(inode, FI_OPENED_FILE); 579 f2fs_up_read(&F2FS_I(inode)->i_sem); 580 if (opened) 581 return 0; 582 583 inode_lock(inode); 584 if (is_inode_flag_set(inode, FI_OPENED_FILE)) 585 goto out_unlock; 586 587 if (!file_should_truncate(inode)) 588 goto out_update; 589 590 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 591 filemap_invalidate_lock(inode->i_mapping); 592 593 truncate_setsize(inode, i_size_read(inode)); 594 ret = f2fs_truncate(inode); 595 596 filemap_invalidate_unlock(inode->i_mapping); 597 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 598 if (ret) 599 goto out_unlock; 600 601 file_dont_truncate(inode); 602 out_update: 603 f2fs_down_write(&F2FS_I(inode)->i_sem); 604 set_inode_flag(inode, FI_OPENED_FILE); 605 f2fs_up_write(&F2FS_I(inode)->i_sem); 606 out_unlock: 607 inode_unlock(inode); 608 return ret; 609 } 610 611 static int f2fs_file_open(struct inode *inode, struct file *filp) 612 { 613 int err = fscrypt_file_open(inode, filp); 614 615 if (err) 616 return err; 617 618 if (!f2fs_is_compress_backend_ready(inode)) 619 return -EOPNOTSUPP; 620 621 err = fsverity_file_open(inode, filp); 622 if (err) 623 return err; 624 625 filp->f_mode |= FMODE_NOWAIT; 626 filp->f_mode |= FMODE_CAN_ODIRECT; 627 628 err = dquot_file_open(inode, filp); 629 if (err) 630 return err; 631 632 err = finish_preallocate_blocks(inode); 633 if (!err) 634 atomic_inc(&F2FS_I(inode)->open_count); 635 return err; 636 } 637 638 void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) 639 { 640 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 641 int nr_free = 0, ofs = dn->ofs_in_node, len = count; 642 __le32 *addr; 643 bool compressed_cluster = false; 644 int cluster_index = 0, valid_blocks = 0; 645 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 646 bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks); 647 block_t blkstart; 648 int blklen = 0; 649 650 addr = get_dnode_addr(dn->inode, dn->node_folio) + ofs; 651 blkstart = le32_to_cpu(*addr); 652 653 /* Assumption: truncation starts with cluster */ 654 for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { 655 block_t blkaddr = le32_to_cpu(*addr); 656 657 if (f2fs_compressed_file(dn->inode) && 658 !(cluster_index & (cluster_size - 1))) { 659 if (compressed_cluster) 660 f2fs_i_compr_blocks_update(dn->inode, 661 valid_blocks, false); 662 compressed_cluster = (blkaddr == COMPRESS_ADDR); 663 valid_blocks = 0; 664 } 665 666 if (blkaddr == NULL_ADDR) 667 goto next; 668 669 f2fs_set_data_blkaddr(dn, NULL_ADDR); 670 671 if (__is_valid_data_blkaddr(blkaddr)) { 672 if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE)) 673 goto next; 674 if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr, 675 DATA_GENERIC_ENHANCE)) 676 goto next; 677 if (compressed_cluster) 678 valid_blocks++; 679 } 680 681 if (blkstart + blklen == blkaddr) { 682 blklen++; 683 } else { 684 f2fs_invalidate_blocks(sbi, blkstart, blklen); 685 blkstart = blkaddr; 686 blklen = 1; 687 } 688 689 if (!released || blkaddr != COMPRESS_ADDR) 690 nr_free++; 691 692 continue; 693 694 next: 695 if (blklen) 696 f2fs_invalidate_blocks(sbi, blkstart, blklen); 697 698 blkstart = le32_to_cpu(*(addr + 1)); 699 blklen = 0; 700 } 701 702 if (blklen) 703 f2fs_invalidate_blocks(sbi, blkstart, blklen); 704 705 if (compressed_cluster) 706 f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false); 707 708 if (nr_free) { 709 pgoff_t fofs; 710 /* 711 * once we invalidate valid blkaddr in range [ofs, ofs + count], 712 * we will invalidate all blkaddr in the whole range. 713 */ 714 fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), 715 dn->inode) + ofs; 716 f2fs_update_read_extent_cache_range(dn, fofs, 0, len); 717 f2fs_update_age_extent_cache_range(dn, fofs, len); 718 dec_valid_block_count(sbi, dn->inode, nr_free); 719 } 720 dn->ofs_in_node = ofs; 721 722 f2fs_update_time(sbi, REQ_TIME); 723 trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, 724 dn->ofs_in_node, nr_free); 725 } 726 727 static int truncate_partial_data_page(struct inode *inode, u64 from, 728 bool cache_only) 729 { 730 loff_t offset = from & (PAGE_SIZE - 1); 731 pgoff_t index = from >> PAGE_SHIFT; 732 struct address_space *mapping = inode->i_mapping; 733 struct folio *folio; 734 735 if (!offset && !cache_only) 736 return 0; 737 738 if (cache_only) { 739 folio = filemap_lock_folio(mapping, index); 740 if (IS_ERR(folio)) 741 return 0; 742 if (folio_test_uptodate(folio)) 743 goto truncate_out; 744 f2fs_folio_put(folio, true); 745 return 0; 746 } 747 748 folio = f2fs_get_lock_data_folio(inode, index, true); 749 if (IS_ERR(folio)) 750 return PTR_ERR(folio) == -ENOENT ? 0 : PTR_ERR(folio); 751 truncate_out: 752 f2fs_folio_wait_writeback(folio, DATA, true, true); 753 folio_zero_segment(folio, offset, folio_size(folio)); 754 755 /* An encrypted inode should have a key and truncate the last page. */ 756 f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode)); 757 if (!cache_only) 758 folio_mark_dirty(folio); 759 f2fs_folio_put(folio, true); 760 return 0; 761 } 762 763 int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) 764 { 765 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 766 struct dnode_of_data dn; 767 pgoff_t free_from; 768 int count = 0, err = 0; 769 struct folio *ifolio; 770 bool truncate_page = false; 771 772 trace_f2fs_truncate_blocks_enter(inode, from); 773 774 if (IS_DEVICE_ALIASING(inode) && from) { 775 err = -EINVAL; 776 goto out_err; 777 } 778 779 free_from = (pgoff_t)F2FS_BLK_ALIGN(from); 780 781 if (free_from >= max_file_blocks(inode)) 782 goto free_partial; 783 784 if (lock) 785 f2fs_lock_op(sbi); 786 787 ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); 788 if (IS_ERR(ifolio)) { 789 err = PTR_ERR(ifolio); 790 goto out; 791 } 792 793 if (IS_DEVICE_ALIASING(inode)) { 794 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; 795 struct extent_info ei = et->largest; 796 797 f2fs_invalidate_blocks(sbi, ei.blk, ei.len); 798 799 dec_valid_block_count(sbi, inode, ei.len); 800 f2fs_update_time(sbi, REQ_TIME); 801 802 f2fs_folio_put(ifolio, true); 803 goto out; 804 } 805 806 if (f2fs_has_inline_data(inode)) { 807 f2fs_truncate_inline_inode(inode, ifolio, from); 808 f2fs_folio_put(ifolio, true); 809 truncate_page = true; 810 goto out; 811 } 812 813 set_new_dnode(&dn, inode, ifolio, NULL, 0); 814 err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); 815 if (err) { 816 if (err == -ENOENT) 817 goto free_next; 818 goto out; 819 } 820 821 count = ADDRS_PER_PAGE(dn.node_folio, inode); 822 823 count -= dn.ofs_in_node; 824 f2fs_bug_on(sbi, count < 0); 825 826 if (dn.ofs_in_node || IS_INODE(dn.node_folio)) { 827 f2fs_truncate_data_blocks_range(&dn, count); 828 free_from += count; 829 } 830 831 f2fs_put_dnode(&dn); 832 free_next: 833 err = f2fs_truncate_inode_blocks(inode, free_from); 834 out: 835 if (lock) 836 f2fs_unlock_op(sbi); 837 free_partial: 838 /* lastly zero out the first data page */ 839 if (!err) 840 err = truncate_partial_data_page(inode, from, truncate_page); 841 out_err: 842 trace_f2fs_truncate_blocks_exit(inode, err); 843 return err; 844 } 845 846 int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) 847 { 848 u64 free_from = from; 849 int err; 850 851 #ifdef CONFIG_F2FS_FS_COMPRESSION 852 /* 853 * for compressed file, only support cluster size 854 * aligned truncation. 855 */ 856 if (f2fs_compressed_file(inode)) 857 free_from = round_up(from, 858 F2FS_I(inode)->i_cluster_size << PAGE_SHIFT); 859 #endif 860 861 err = f2fs_do_truncate_blocks(inode, free_from, lock); 862 if (err) 863 return err; 864 865 #ifdef CONFIG_F2FS_FS_COMPRESSION 866 /* 867 * For compressed file, after release compress blocks, don't allow write 868 * direct, but we should allow write direct after truncate to zero. 869 */ 870 if (f2fs_compressed_file(inode) && !free_from 871 && is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) 872 clear_inode_flag(inode, FI_COMPRESS_RELEASED); 873 874 if (from != free_from) { 875 err = f2fs_truncate_partial_cluster(inode, from, lock); 876 if (err) 877 return err; 878 } 879 #endif 880 881 return 0; 882 } 883 884 int f2fs_truncate(struct inode *inode) 885 { 886 int err; 887 888 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 889 return -EIO; 890 891 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 892 S_ISLNK(inode->i_mode))) 893 return 0; 894 895 trace_f2fs_truncate(inode); 896 897 if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) 898 return -EIO; 899 900 err = f2fs_dquot_initialize(inode); 901 if (err) 902 return err; 903 904 /* we should check inline_data size */ 905 if (!f2fs_may_inline_data(inode)) { 906 err = f2fs_convert_inline_inode(inode); 907 if (err) 908 return err; 909 } 910 911 err = f2fs_truncate_blocks(inode, i_size_read(inode), true); 912 if (err) 913 return err; 914 915 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 916 f2fs_mark_inode_dirty_sync(inode, false); 917 return 0; 918 } 919 920 static bool f2fs_force_buffered_io(struct inode *inode, int rw) 921 { 922 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 923 924 if (!fscrypt_dio_supported(inode)) 925 return true; 926 if (fsverity_active(inode)) 927 return true; 928 if (f2fs_compressed_file(inode)) 929 return true; 930 /* 931 * only force direct read to use buffered IO, for direct write, 932 * it expects inline data conversion before committing IO. 933 */ 934 if (f2fs_has_inline_data(inode) && rw == READ) 935 return true; 936 937 /* disallow direct IO if any of devices has unaligned blksize */ 938 if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) 939 return true; 940 /* 941 * for blkzoned device, fallback direct IO to buffered IO, so 942 * all IOs can be serialized by log-structured write. 943 */ 944 if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) && 945 !f2fs_is_pinned_file(inode)) 946 return true; 947 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) 948 return true; 949 950 return false; 951 } 952 953 int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path, 954 struct kstat *stat, u32 request_mask, unsigned int query_flags) 955 { 956 struct inode *inode = d_inode(path->dentry); 957 struct f2fs_inode_info *fi = F2FS_I(inode); 958 struct f2fs_inode *ri = NULL; 959 unsigned int flags; 960 961 if (f2fs_has_extra_attr(inode) && 962 f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && 963 F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { 964 stat->result_mask |= STATX_BTIME; 965 stat->btime.tv_sec = fi->i_crtime.tv_sec; 966 stat->btime.tv_nsec = fi->i_crtime.tv_nsec; 967 } 968 969 /* 970 * Return the DIO alignment restrictions if requested. We only return 971 * this information when requested, since on encrypted files it might 972 * take a fair bit of work to get if the file wasn't opened recently. 973 * 974 * f2fs sometimes supports DIO reads but not DIO writes. STATX_DIOALIGN 975 * cannot represent that, so in that case we report no DIO support. 976 */ 977 if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) { 978 unsigned int bsize = i_blocksize(inode); 979 980 stat->result_mask |= STATX_DIOALIGN; 981 if (!f2fs_force_buffered_io(inode, WRITE)) { 982 stat->dio_mem_align = bsize; 983 stat->dio_offset_align = bsize; 984 } 985 } 986 987 flags = fi->i_flags; 988 if (flags & F2FS_COMPR_FL) 989 stat->attributes |= STATX_ATTR_COMPRESSED; 990 if (flags & F2FS_APPEND_FL) 991 stat->attributes |= STATX_ATTR_APPEND; 992 if (IS_ENCRYPTED(inode)) 993 stat->attributes |= STATX_ATTR_ENCRYPTED; 994 if (flags & F2FS_IMMUTABLE_FL) 995 stat->attributes |= STATX_ATTR_IMMUTABLE; 996 if (flags & F2FS_NODUMP_FL) 997 stat->attributes |= STATX_ATTR_NODUMP; 998 if (IS_VERITY(inode)) 999 stat->attributes |= STATX_ATTR_VERITY; 1000 1001 stat->attributes_mask |= (STATX_ATTR_COMPRESSED | 1002 STATX_ATTR_APPEND | 1003 STATX_ATTR_ENCRYPTED | 1004 STATX_ATTR_IMMUTABLE | 1005 STATX_ATTR_NODUMP | 1006 STATX_ATTR_VERITY); 1007 1008 generic_fillattr(idmap, request_mask, inode, stat); 1009 1010 /* we need to show initial sectors used for inline_data/dentries */ 1011 if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) || 1012 f2fs_has_inline_dentry(inode)) 1013 stat->blocks += (stat->size + 511) >> 9; 1014 1015 return 0; 1016 } 1017 1018 #ifdef CONFIG_F2FS_FS_POSIX_ACL 1019 static void __setattr_copy(struct mnt_idmap *idmap, 1020 struct inode *inode, const struct iattr *attr) 1021 { 1022 unsigned int ia_valid = attr->ia_valid; 1023 1024 i_uid_update(idmap, attr, inode); 1025 i_gid_update(idmap, attr, inode); 1026 if (ia_valid & ATTR_ATIME) 1027 inode_set_atime_to_ts(inode, attr->ia_atime); 1028 if (ia_valid & ATTR_MTIME) 1029 inode_set_mtime_to_ts(inode, attr->ia_mtime); 1030 if (ia_valid & ATTR_CTIME) 1031 inode_set_ctime_to_ts(inode, attr->ia_ctime); 1032 if (ia_valid & ATTR_MODE) { 1033 umode_t mode = attr->ia_mode; 1034 1035 if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) 1036 mode &= ~S_ISGID; 1037 set_acl_inode(inode, mode); 1038 } 1039 } 1040 #else 1041 #define __setattr_copy setattr_copy 1042 #endif 1043 1044 int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 1045 struct iattr *attr) 1046 { 1047 struct inode *inode = d_inode(dentry); 1048 struct f2fs_inode_info *fi = F2FS_I(inode); 1049 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1050 int err; 1051 1052 if (unlikely(f2fs_cp_error(sbi))) 1053 return -EIO; 1054 1055 err = setattr_prepare(idmap, dentry, attr); 1056 if (err) 1057 return err; 1058 1059 err = fscrypt_prepare_setattr(dentry, attr); 1060 if (err) 1061 return err; 1062 1063 err = fsverity_prepare_setattr(dentry, attr); 1064 if (err) 1065 return err; 1066 1067 if (unlikely(IS_IMMUTABLE(inode))) 1068 return -EPERM; 1069 1070 if (unlikely(IS_APPEND(inode) && 1071 (attr->ia_valid & (ATTR_MODE | ATTR_UID | 1072 ATTR_GID | ATTR_TIMES_SET)))) 1073 return -EPERM; 1074 1075 if ((attr->ia_valid & ATTR_SIZE)) { 1076 if (!f2fs_is_compress_backend_ready(inode) || 1077 IS_DEVICE_ALIASING(inode)) 1078 return -EOPNOTSUPP; 1079 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && 1080 !IS_ALIGNED(attr->ia_size, 1081 F2FS_BLK_TO_BYTES(fi->i_cluster_size))) 1082 return -EINVAL; 1083 /* 1084 * To prevent scattered pin block generation, we don't allow 1085 * smaller/equal size unaligned truncation for pinned file. 1086 * We only support overwrite IO to pinned file, so don't 1087 * care about larger size truncation. 1088 */ 1089 if (f2fs_is_pinned_file(inode) && 1090 attr->ia_size <= i_size_read(inode) && 1091 !IS_ALIGNED(attr->ia_size, 1092 F2FS_BLK_TO_BYTES(CAP_BLKS_PER_SEC(sbi)))) 1093 return -EINVAL; 1094 } 1095 1096 if (is_quota_modification(idmap, inode, attr)) { 1097 err = f2fs_dquot_initialize(inode); 1098 if (err) 1099 return err; 1100 } 1101 if (i_uid_needs_update(idmap, attr, inode) || 1102 i_gid_needs_update(idmap, attr, inode)) { 1103 f2fs_lock_op(sbi); 1104 err = dquot_transfer(idmap, inode, attr); 1105 if (err) { 1106 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); 1107 f2fs_unlock_op(sbi); 1108 return err; 1109 } 1110 /* 1111 * update uid/gid under lock_op(), so that dquot and inode can 1112 * be updated atomically. 1113 */ 1114 i_uid_update(idmap, attr, inode); 1115 i_gid_update(idmap, attr, inode); 1116 f2fs_mark_inode_dirty_sync(inode, true); 1117 f2fs_unlock_op(sbi); 1118 } 1119 1120 if (attr->ia_valid & ATTR_SIZE) { 1121 loff_t old_size = i_size_read(inode); 1122 1123 if (attr->ia_size > MAX_INLINE_DATA(inode)) { 1124 /* 1125 * should convert inline inode before i_size_write to 1126 * keep smaller than inline_data size with inline flag. 1127 */ 1128 err = f2fs_convert_inline_inode(inode); 1129 if (err) 1130 return err; 1131 } 1132 1133 /* 1134 * wait for inflight dio, blocks should be removed after 1135 * IO completion. 1136 */ 1137 if (attr->ia_size < old_size) 1138 inode_dio_wait(inode); 1139 1140 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 1141 filemap_invalidate_lock(inode->i_mapping); 1142 1143 if (attr->ia_size > old_size) 1144 f2fs_zero_post_eof_page(inode, attr->ia_size); 1145 truncate_setsize(inode, attr->ia_size); 1146 1147 if (attr->ia_size <= old_size) 1148 err = f2fs_truncate(inode); 1149 /* 1150 * do not trim all blocks after i_size if target size is 1151 * larger than i_size. 1152 */ 1153 filemap_invalidate_unlock(inode->i_mapping); 1154 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 1155 if (err) 1156 return err; 1157 1158 spin_lock(&fi->i_size_lock); 1159 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 1160 fi->last_disk_size = i_size_read(inode); 1161 spin_unlock(&fi->i_size_lock); 1162 } 1163 1164 __setattr_copy(idmap, inode, attr); 1165 1166 if (attr->ia_valid & ATTR_MODE) { 1167 err = posix_acl_chmod(idmap, dentry, f2fs_get_inode_mode(inode)); 1168 1169 if (is_inode_flag_set(inode, FI_ACL_MODE)) { 1170 if (!err) 1171 inode->i_mode = fi->i_acl_mode; 1172 clear_inode_flag(inode, FI_ACL_MODE); 1173 } 1174 } 1175 1176 /* file size may changed here */ 1177 f2fs_mark_inode_dirty_sync(inode, true); 1178 1179 /* inode change will produce dirty node pages flushed by checkpoint */ 1180 f2fs_balance_fs(sbi, true); 1181 1182 return err; 1183 } 1184 1185 const struct inode_operations f2fs_file_inode_operations = { 1186 .getattr = f2fs_getattr, 1187 .setattr = f2fs_setattr, 1188 .get_inode_acl = f2fs_get_acl, 1189 .set_acl = f2fs_set_acl, 1190 .listxattr = f2fs_listxattr, 1191 .fiemap = f2fs_fiemap, 1192 .fileattr_get = f2fs_fileattr_get, 1193 .fileattr_set = f2fs_fileattr_set, 1194 }; 1195 1196 static int fill_zero(struct inode *inode, pgoff_t index, 1197 loff_t start, loff_t len) 1198 { 1199 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1200 struct folio *folio; 1201 1202 if (!len) 1203 return 0; 1204 1205 f2fs_balance_fs(sbi, true); 1206 1207 f2fs_lock_op(sbi); 1208 folio = f2fs_get_new_data_folio(inode, NULL, index, false); 1209 f2fs_unlock_op(sbi); 1210 1211 if (IS_ERR(folio)) 1212 return PTR_ERR(folio); 1213 1214 f2fs_folio_wait_writeback(folio, DATA, true, true); 1215 folio_zero_range(folio, start, len); 1216 folio_mark_dirty(folio); 1217 f2fs_folio_put(folio, true); 1218 return 0; 1219 } 1220 1221 int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) 1222 { 1223 int err; 1224 1225 while (pg_start < pg_end) { 1226 struct dnode_of_data dn; 1227 pgoff_t end_offset, count; 1228 1229 set_new_dnode(&dn, inode, NULL, NULL, 0); 1230 err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); 1231 if (err) { 1232 if (err == -ENOENT) { 1233 pg_start = f2fs_get_next_page_offset(&dn, 1234 pg_start); 1235 continue; 1236 } 1237 return err; 1238 } 1239 1240 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 1241 count = min(end_offset - dn.ofs_in_node, pg_end - pg_start); 1242 1243 f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset); 1244 1245 f2fs_truncate_data_blocks_range(&dn, count); 1246 f2fs_put_dnode(&dn); 1247 1248 pg_start += count; 1249 } 1250 return 0; 1251 } 1252 1253 static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len) 1254 { 1255 pgoff_t pg_start, pg_end; 1256 loff_t off_start, off_end; 1257 int ret; 1258 1259 ret = f2fs_convert_inline_inode(inode); 1260 if (ret) 1261 return ret; 1262 1263 filemap_invalidate_lock(inode->i_mapping); 1264 f2fs_zero_post_eof_page(inode, offset + len); 1265 filemap_invalidate_unlock(inode->i_mapping); 1266 1267 pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; 1268 pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; 1269 1270 off_start = offset & (PAGE_SIZE - 1); 1271 off_end = (offset + len) & (PAGE_SIZE - 1); 1272 1273 if (pg_start == pg_end) { 1274 ret = fill_zero(inode, pg_start, off_start, 1275 off_end - off_start); 1276 if (ret) 1277 return ret; 1278 } else { 1279 if (off_start) { 1280 ret = fill_zero(inode, pg_start++, off_start, 1281 PAGE_SIZE - off_start); 1282 if (ret) 1283 return ret; 1284 } 1285 if (off_end) { 1286 ret = fill_zero(inode, pg_end, 0, off_end); 1287 if (ret) 1288 return ret; 1289 } 1290 1291 if (pg_start < pg_end) { 1292 loff_t blk_start, blk_end; 1293 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1294 1295 f2fs_balance_fs(sbi, true); 1296 1297 blk_start = (loff_t)pg_start << PAGE_SHIFT; 1298 blk_end = (loff_t)pg_end << PAGE_SHIFT; 1299 1300 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1301 filemap_invalidate_lock(inode->i_mapping); 1302 1303 truncate_pagecache_range(inode, blk_start, blk_end - 1); 1304 1305 f2fs_lock_op(sbi); 1306 ret = f2fs_truncate_hole(inode, pg_start, pg_end); 1307 f2fs_unlock_op(sbi); 1308 1309 filemap_invalidate_unlock(inode->i_mapping); 1310 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1311 } 1312 } 1313 1314 return ret; 1315 } 1316 1317 static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, 1318 int *do_replace, pgoff_t off, pgoff_t len) 1319 { 1320 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1321 struct dnode_of_data dn; 1322 int ret, done, i; 1323 1324 next_dnode: 1325 set_new_dnode(&dn, inode, NULL, NULL, 0); 1326 ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); 1327 if (ret && ret != -ENOENT) { 1328 return ret; 1329 } else if (ret == -ENOENT) { 1330 if (dn.max_level == 0) 1331 return -ENOENT; 1332 done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - 1333 dn.ofs_in_node, len); 1334 blkaddr += done; 1335 do_replace += done; 1336 goto next; 1337 } 1338 1339 done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, inode) - 1340 dn.ofs_in_node, len); 1341 for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { 1342 *blkaddr = f2fs_data_blkaddr(&dn); 1343 1344 if (__is_valid_data_blkaddr(*blkaddr) && 1345 !f2fs_is_valid_blkaddr(sbi, *blkaddr, 1346 DATA_GENERIC_ENHANCE)) { 1347 f2fs_put_dnode(&dn); 1348 return -EFSCORRUPTED; 1349 } 1350 1351 if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { 1352 1353 if (f2fs_lfs_mode(sbi)) { 1354 f2fs_put_dnode(&dn); 1355 return -EOPNOTSUPP; 1356 } 1357 1358 /* do not invalidate this block address */ 1359 f2fs_update_data_blkaddr(&dn, NULL_ADDR); 1360 *do_replace = 1; 1361 } 1362 } 1363 f2fs_put_dnode(&dn); 1364 next: 1365 len -= done; 1366 off += done; 1367 if (len) 1368 goto next_dnode; 1369 return 0; 1370 } 1371 1372 static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, 1373 int *do_replace, pgoff_t off, int len) 1374 { 1375 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1376 struct dnode_of_data dn; 1377 int ret, i; 1378 1379 for (i = 0; i < len; i++, do_replace++, blkaddr++) { 1380 if (*do_replace == 0) 1381 continue; 1382 1383 set_new_dnode(&dn, inode, NULL, NULL, 0); 1384 ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); 1385 if (ret) { 1386 dec_valid_block_count(sbi, inode, 1); 1387 f2fs_invalidate_blocks(sbi, *blkaddr, 1); 1388 } else { 1389 f2fs_update_data_blkaddr(&dn, *blkaddr); 1390 } 1391 f2fs_put_dnode(&dn); 1392 } 1393 return 0; 1394 } 1395 1396 static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, 1397 block_t *blkaddr, int *do_replace, 1398 pgoff_t src, pgoff_t dst, pgoff_t len, bool full) 1399 { 1400 struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode); 1401 pgoff_t i = 0; 1402 int ret; 1403 1404 while (i < len) { 1405 if (blkaddr[i] == NULL_ADDR && !full) { 1406 i++; 1407 continue; 1408 } 1409 1410 if (do_replace[i] || blkaddr[i] == NULL_ADDR) { 1411 struct dnode_of_data dn; 1412 struct node_info ni; 1413 size_t new_size; 1414 pgoff_t ilen; 1415 1416 set_new_dnode(&dn, dst_inode, NULL, NULL, 0); 1417 ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE); 1418 if (ret) 1419 return ret; 1420 1421 ret = f2fs_get_node_info(sbi, dn.nid, &ni, false); 1422 if (ret) { 1423 f2fs_put_dnode(&dn); 1424 return ret; 1425 } 1426 1427 ilen = min((pgoff_t) 1428 ADDRS_PER_PAGE(dn.node_folio, dst_inode) - 1429 dn.ofs_in_node, len - i); 1430 do { 1431 dn.data_blkaddr = f2fs_data_blkaddr(&dn); 1432 f2fs_truncate_data_blocks_range(&dn, 1); 1433 1434 if (do_replace[i]) { 1435 f2fs_i_blocks_write(src_inode, 1436 1, false, false); 1437 f2fs_i_blocks_write(dst_inode, 1438 1, true, false); 1439 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, 1440 blkaddr[i], ni.version, true, false); 1441 1442 do_replace[i] = 0; 1443 } 1444 dn.ofs_in_node++; 1445 i++; 1446 new_size = (loff_t)(dst + i) << PAGE_SHIFT; 1447 if (dst_inode->i_size < new_size) 1448 f2fs_i_size_write(dst_inode, new_size); 1449 } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR)); 1450 1451 f2fs_put_dnode(&dn); 1452 } else { 1453 struct folio *fsrc, *fdst; 1454 1455 fsrc = f2fs_get_lock_data_folio(src_inode, 1456 src + i, true); 1457 if (IS_ERR(fsrc)) 1458 return PTR_ERR(fsrc); 1459 fdst = f2fs_get_new_data_folio(dst_inode, NULL, dst + i, 1460 true); 1461 if (IS_ERR(fdst)) { 1462 f2fs_folio_put(fsrc, true); 1463 return PTR_ERR(fdst); 1464 } 1465 1466 f2fs_folio_wait_writeback(fdst, DATA, true, true); 1467 1468 memcpy_folio(fdst, 0, fsrc, 0, PAGE_SIZE); 1469 folio_mark_dirty(fdst); 1470 folio_set_f2fs_gcing(fdst); 1471 f2fs_folio_put(fdst, true); 1472 f2fs_folio_put(fsrc, true); 1473 1474 ret = f2fs_truncate_hole(src_inode, 1475 src + i, src + i + 1); 1476 if (ret) 1477 return ret; 1478 i++; 1479 } 1480 } 1481 return 0; 1482 } 1483 1484 static int __exchange_data_block(struct inode *src_inode, 1485 struct inode *dst_inode, pgoff_t src, pgoff_t dst, 1486 pgoff_t len, bool full) 1487 { 1488 block_t *src_blkaddr; 1489 int *do_replace; 1490 pgoff_t olen; 1491 int ret; 1492 1493 while (len) { 1494 olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len); 1495 1496 src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode), 1497 array_size(olen, sizeof(block_t)), 1498 GFP_NOFS); 1499 if (!src_blkaddr) 1500 return -ENOMEM; 1501 1502 do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode), 1503 array_size(olen, sizeof(int)), 1504 GFP_NOFS); 1505 if (!do_replace) { 1506 kvfree(src_blkaddr); 1507 return -ENOMEM; 1508 } 1509 1510 ret = __read_out_blkaddrs(src_inode, src_blkaddr, 1511 do_replace, src, olen); 1512 if (ret) 1513 goto roll_back; 1514 1515 ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr, 1516 do_replace, src, dst, olen, full); 1517 if (ret) 1518 goto roll_back; 1519 1520 src += olen; 1521 dst += olen; 1522 len -= olen; 1523 1524 kvfree(src_blkaddr); 1525 kvfree(do_replace); 1526 } 1527 return 0; 1528 1529 roll_back: 1530 __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen); 1531 kvfree(src_blkaddr); 1532 kvfree(do_replace); 1533 return ret; 1534 } 1535 1536 static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) 1537 { 1538 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1539 pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 1540 pgoff_t start = offset >> PAGE_SHIFT; 1541 pgoff_t end = (offset + len) >> PAGE_SHIFT; 1542 int ret; 1543 1544 f2fs_balance_fs(sbi, true); 1545 1546 /* avoid gc operation during block exchange */ 1547 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1548 filemap_invalidate_lock(inode->i_mapping); 1549 1550 f2fs_zero_post_eof_page(inode, offset + len); 1551 1552 f2fs_lock_op(sbi); 1553 f2fs_drop_extent_tree(inode); 1554 truncate_pagecache(inode, offset); 1555 ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true); 1556 f2fs_unlock_op(sbi); 1557 1558 filemap_invalidate_unlock(inode->i_mapping); 1559 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1560 return ret; 1561 } 1562 1563 static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) 1564 { 1565 loff_t new_size; 1566 int ret; 1567 1568 if (offset + len >= i_size_read(inode)) 1569 return -EINVAL; 1570 1571 /* collapse range should be aligned to block size of f2fs. */ 1572 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) 1573 return -EINVAL; 1574 1575 ret = f2fs_convert_inline_inode(inode); 1576 if (ret) 1577 return ret; 1578 1579 /* write out all dirty pages from offset */ 1580 ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); 1581 if (ret) 1582 return ret; 1583 1584 ret = f2fs_do_collapse(inode, offset, len); 1585 if (ret) 1586 return ret; 1587 1588 /* write out all moved pages, if possible */ 1589 filemap_invalidate_lock(inode->i_mapping); 1590 filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); 1591 truncate_pagecache(inode, offset); 1592 1593 new_size = i_size_read(inode) - len; 1594 ret = f2fs_truncate_blocks(inode, new_size, true); 1595 filemap_invalidate_unlock(inode->i_mapping); 1596 if (!ret) 1597 f2fs_i_size_write(inode, new_size); 1598 return ret; 1599 } 1600 1601 static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, 1602 pgoff_t end) 1603 { 1604 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1605 pgoff_t index = start; 1606 unsigned int ofs_in_node = dn->ofs_in_node; 1607 blkcnt_t count = 0; 1608 int ret; 1609 1610 for (; index < end; index++, dn->ofs_in_node++) { 1611 if (f2fs_data_blkaddr(dn) == NULL_ADDR) 1612 count++; 1613 } 1614 1615 dn->ofs_in_node = ofs_in_node; 1616 ret = f2fs_reserve_new_blocks(dn, count); 1617 if (ret) 1618 return ret; 1619 1620 dn->ofs_in_node = ofs_in_node; 1621 for (index = start; index < end; index++, dn->ofs_in_node++) { 1622 dn->data_blkaddr = f2fs_data_blkaddr(dn); 1623 /* 1624 * f2fs_reserve_new_blocks will not guarantee entire block 1625 * allocation. 1626 */ 1627 if (dn->data_blkaddr == NULL_ADDR) { 1628 ret = -ENOSPC; 1629 break; 1630 } 1631 1632 if (dn->data_blkaddr == NEW_ADDR) 1633 continue; 1634 1635 if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr, 1636 DATA_GENERIC_ENHANCE)) { 1637 ret = -EFSCORRUPTED; 1638 break; 1639 } 1640 1641 f2fs_invalidate_blocks(sbi, dn->data_blkaddr, 1); 1642 f2fs_set_data_blkaddr(dn, NEW_ADDR); 1643 } 1644 1645 f2fs_update_read_extent_cache_range(dn, start, 0, index - start); 1646 f2fs_update_age_extent_cache_range(dn, start, index - start); 1647 1648 return ret; 1649 } 1650 1651 static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, 1652 int mode) 1653 { 1654 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1655 struct address_space *mapping = inode->i_mapping; 1656 pgoff_t index, pg_start, pg_end; 1657 loff_t new_size = i_size_read(inode); 1658 loff_t off_start, off_end; 1659 int ret = 0; 1660 1661 ret = inode_newsize_ok(inode, (len + offset)); 1662 if (ret) 1663 return ret; 1664 1665 ret = f2fs_convert_inline_inode(inode); 1666 if (ret) 1667 return ret; 1668 1669 ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); 1670 if (ret) 1671 return ret; 1672 1673 filemap_invalidate_lock(mapping); 1674 f2fs_zero_post_eof_page(inode, offset + len); 1675 filemap_invalidate_unlock(mapping); 1676 1677 pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; 1678 pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; 1679 1680 off_start = offset & (PAGE_SIZE - 1); 1681 off_end = (offset + len) & (PAGE_SIZE - 1); 1682 1683 if (pg_start == pg_end) { 1684 ret = fill_zero(inode, pg_start, off_start, 1685 off_end - off_start); 1686 if (ret) 1687 return ret; 1688 1689 new_size = max_t(loff_t, new_size, offset + len); 1690 } else { 1691 if (off_start) { 1692 ret = fill_zero(inode, pg_start++, off_start, 1693 PAGE_SIZE - off_start); 1694 if (ret) 1695 return ret; 1696 1697 new_size = max_t(loff_t, new_size, 1698 (loff_t)pg_start << PAGE_SHIFT); 1699 } 1700 1701 for (index = pg_start; index < pg_end;) { 1702 struct dnode_of_data dn; 1703 unsigned int end_offset; 1704 pgoff_t end; 1705 1706 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1707 filemap_invalidate_lock(mapping); 1708 1709 truncate_pagecache_range(inode, 1710 (loff_t)index << PAGE_SHIFT, 1711 ((loff_t)pg_end << PAGE_SHIFT) - 1); 1712 1713 f2fs_lock_op(sbi); 1714 1715 set_new_dnode(&dn, inode, NULL, NULL, 0); 1716 ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE); 1717 if (ret) { 1718 f2fs_unlock_op(sbi); 1719 filemap_invalidate_unlock(mapping); 1720 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1721 goto out; 1722 } 1723 1724 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 1725 end = min(pg_end, end_offset - dn.ofs_in_node + index); 1726 1727 ret = f2fs_do_zero_range(&dn, index, end); 1728 f2fs_put_dnode(&dn); 1729 1730 f2fs_unlock_op(sbi); 1731 filemap_invalidate_unlock(mapping); 1732 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1733 1734 f2fs_balance_fs(sbi, dn.node_changed); 1735 1736 if (ret) 1737 goto out; 1738 1739 index = end; 1740 new_size = max_t(loff_t, new_size, 1741 (loff_t)index << PAGE_SHIFT); 1742 } 1743 1744 if (off_end) { 1745 ret = fill_zero(inode, pg_end, 0, off_end); 1746 if (ret) 1747 goto out; 1748 1749 new_size = max_t(loff_t, new_size, offset + len); 1750 } 1751 } 1752 1753 out: 1754 if (new_size > i_size_read(inode)) { 1755 if (mode & FALLOC_FL_KEEP_SIZE) 1756 file_set_keep_isize(inode); 1757 else 1758 f2fs_i_size_write(inode, new_size); 1759 } 1760 return ret; 1761 } 1762 1763 static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) 1764 { 1765 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1766 struct address_space *mapping = inode->i_mapping; 1767 pgoff_t nr, pg_start, pg_end, delta, idx; 1768 loff_t new_size; 1769 int ret = 0; 1770 1771 new_size = i_size_read(inode) + len; 1772 ret = inode_newsize_ok(inode, new_size); 1773 if (ret) 1774 return ret; 1775 1776 if (offset >= i_size_read(inode)) 1777 return -EINVAL; 1778 1779 /* insert range should be aligned to block size of f2fs. */ 1780 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) 1781 return -EINVAL; 1782 1783 ret = f2fs_convert_inline_inode(inode); 1784 if (ret) 1785 return ret; 1786 1787 f2fs_balance_fs(sbi, true); 1788 1789 filemap_invalidate_lock(mapping); 1790 ret = f2fs_truncate_blocks(inode, i_size_read(inode), true); 1791 filemap_invalidate_unlock(mapping); 1792 if (ret) 1793 return ret; 1794 1795 /* write out all dirty pages from offset */ 1796 ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX); 1797 if (ret) 1798 return ret; 1799 1800 pg_start = offset >> PAGE_SHIFT; 1801 pg_end = (offset + len) >> PAGE_SHIFT; 1802 delta = pg_end - pg_start; 1803 idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 1804 1805 /* avoid gc operation during block exchange */ 1806 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1807 filemap_invalidate_lock(mapping); 1808 1809 f2fs_zero_post_eof_page(inode, offset + len); 1810 truncate_pagecache(inode, offset); 1811 1812 while (!ret && idx > pg_start) { 1813 nr = idx - pg_start; 1814 if (nr > delta) 1815 nr = delta; 1816 idx -= nr; 1817 1818 f2fs_lock_op(sbi); 1819 f2fs_drop_extent_tree(inode); 1820 1821 ret = __exchange_data_block(inode, inode, idx, 1822 idx + delta, nr, false); 1823 f2fs_unlock_op(sbi); 1824 } 1825 filemap_invalidate_unlock(mapping); 1826 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1827 if (ret) 1828 return ret; 1829 1830 /* write out all moved pages, if possible */ 1831 filemap_invalidate_lock(mapping); 1832 ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX); 1833 truncate_pagecache(inode, offset); 1834 filemap_invalidate_unlock(mapping); 1835 1836 if (!ret) 1837 f2fs_i_size_write(inode, new_size); 1838 return ret; 1839 } 1840 1841 static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, 1842 loff_t len, int mode) 1843 { 1844 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1845 struct f2fs_map_blocks map = { .m_next_pgofs = NULL, 1846 .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, 1847 .m_may_create = true }; 1848 struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, 1849 .init_gc_type = FG_GC, 1850 .should_migrate_blocks = false, 1851 .err_gc_skipped = true, 1852 .nr_free_secs = 0 }; 1853 pgoff_t pg_start, pg_end; 1854 loff_t new_size; 1855 loff_t off_end; 1856 block_t expanded = 0; 1857 int err; 1858 1859 err = inode_newsize_ok(inode, (len + offset)); 1860 if (err) 1861 return err; 1862 1863 err = f2fs_convert_inline_inode(inode); 1864 if (err) 1865 return err; 1866 1867 filemap_invalidate_lock(inode->i_mapping); 1868 f2fs_zero_post_eof_page(inode, offset + len); 1869 filemap_invalidate_unlock(inode->i_mapping); 1870 1871 f2fs_balance_fs(sbi, true); 1872 1873 pg_start = ((unsigned long long)offset) >> PAGE_SHIFT; 1874 pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT; 1875 off_end = (offset + len) & (PAGE_SIZE - 1); 1876 1877 map.m_lblk = pg_start; 1878 map.m_len = pg_end - pg_start; 1879 if (off_end) 1880 map.m_len++; 1881 1882 if (!map.m_len) 1883 return 0; 1884 1885 if (f2fs_is_pinned_file(inode)) { 1886 block_t sec_blks = CAP_BLKS_PER_SEC(sbi); 1887 block_t sec_len = roundup(map.m_len, sec_blks); 1888 1889 map.m_len = sec_blks; 1890 next_alloc: 1891 f2fs_down_write(&sbi->pin_sem); 1892 1893 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 1894 if (has_not_enough_free_secs(sbi, 0, 0)) { 1895 f2fs_up_write(&sbi->pin_sem); 1896 err = -ENOSPC; 1897 f2fs_warn_ratelimited(sbi, 1898 "ino:%lu, start:%lu, end:%lu, need to trigger GC to " 1899 "reclaim enough free segment when checkpoint is enabled", 1900 inode->i_ino, pg_start, pg_end); 1901 goto out_err; 1902 } 1903 } 1904 1905 if (has_not_enough_free_secs(sbi, 0, 1906 sbi->reserved_pin_section)) { 1907 f2fs_down_write(&sbi->gc_lock); 1908 stat_inc_gc_call_count(sbi, FOREGROUND); 1909 err = f2fs_gc(sbi, &gc_control); 1910 if (err && err != -ENODATA) { 1911 f2fs_up_write(&sbi->pin_sem); 1912 goto out_err; 1913 } 1914 } 1915 1916 err = f2fs_allocate_pinning_section(sbi); 1917 if (err) { 1918 f2fs_up_write(&sbi->pin_sem); 1919 goto out_err; 1920 } 1921 1922 map.m_seg_type = CURSEG_COLD_DATA_PINNED; 1923 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO); 1924 file_dont_truncate(inode); 1925 1926 f2fs_up_write(&sbi->pin_sem); 1927 1928 expanded += map.m_len; 1929 sec_len -= map.m_len; 1930 map.m_lblk += map.m_len; 1931 if (!err && sec_len) 1932 goto next_alloc; 1933 1934 map.m_len = expanded; 1935 } else { 1936 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_AIO); 1937 expanded = map.m_len; 1938 } 1939 out_err: 1940 if (err) { 1941 pgoff_t last_off; 1942 1943 if (!expanded) 1944 return err; 1945 1946 last_off = pg_start + expanded - 1; 1947 1948 /* update new size to the failed position */ 1949 new_size = (last_off == pg_end) ? offset + len : 1950 (loff_t)(last_off + 1) << PAGE_SHIFT; 1951 } else { 1952 new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; 1953 } 1954 1955 if (new_size > i_size_read(inode)) { 1956 if (mode & FALLOC_FL_KEEP_SIZE) 1957 file_set_keep_isize(inode); 1958 else 1959 f2fs_i_size_write(inode, new_size); 1960 } 1961 1962 return err; 1963 } 1964 1965 static long f2fs_fallocate(struct file *file, int mode, 1966 loff_t offset, loff_t len) 1967 { 1968 struct inode *inode = file_inode(file); 1969 long ret = 0; 1970 1971 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 1972 return -EIO; 1973 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) 1974 return -ENOSPC; 1975 if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) 1976 return -EOPNOTSUPP; 1977 1978 /* f2fs only support ->fallocate for regular file */ 1979 if (!S_ISREG(inode->i_mode)) 1980 return -EINVAL; 1981 1982 if (IS_ENCRYPTED(inode) && 1983 (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) 1984 return -EOPNOTSUPP; 1985 1986 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 1987 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | 1988 FALLOC_FL_INSERT_RANGE)) 1989 return -EOPNOTSUPP; 1990 1991 inode_lock(inode); 1992 1993 /* 1994 * Pinned file should not support partial truncation since the block 1995 * can be used by applications. 1996 */ 1997 if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && 1998 (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | 1999 FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) { 2000 ret = -EOPNOTSUPP; 2001 goto out; 2002 } 2003 2004 ret = file_modified(file); 2005 if (ret) 2006 goto out; 2007 2008 /* 2009 * wait for inflight dio, blocks should be removed after IO 2010 * completion. 2011 */ 2012 inode_dio_wait(inode); 2013 2014 if (mode & FALLOC_FL_PUNCH_HOLE) { 2015 if (offset >= inode->i_size) 2016 goto out; 2017 2018 ret = f2fs_punch_hole(inode, offset, len); 2019 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { 2020 ret = f2fs_collapse_range(inode, offset, len); 2021 } else if (mode & FALLOC_FL_ZERO_RANGE) { 2022 ret = f2fs_zero_range(inode, offset, len, mode); 2023 } else if (mode & FALLOC_FL_INSERT_RANGE) { 2024 ret = f2fs_insert_range(inode, offset, len); 2025 } else { 2026 ret = f2fs_expand_inode_data(inode, offset, len, mode); 2027 } 2028 2029 if (!ret) { 2030 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 2031 f2fs_mark_inode_dirty_sync(inode, false); 2032 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2033 } 2034 2035 out: 2036 inode_unlock(inode); 2037 2038 trace_f2fs_fallocate(inode, mode, offset, len, ret); 2039 return ret; 2040 } 2041 2042 static int f2fs_release_file(struct inode *inode, struct file *filp) 2043 { 2044 if (atomic_dec_and_test(&F2FS_I(inode)->open_count)) 2045 f2fs_remove_donate_inode(inode); 2046 2047 /* 2048 * f2fs_release_file is called at every close calls. So we should 2049 * not drop any inmemory pages by close called by other process. 2050 */ 2051 if (!(filp->f_mode & FMODE_WRITE) || 2052 atomic_read(&inode->i_writecount) != 1) 2053 return 0; 2054 2055 inode_lock(inode); 2056 f2fs_abort_atomic_write(inode, true); 2057 inode_unlock(inode); 2058 2059 return 0; 2060 } 2061 2062 static int f2fs_file_flush(struct file *file, fl_owner_t id) 2063 { 2064 struct inode *inode = file_inode(file); 2065 2066 /* 2067 * If the process doing a transaction is crashed, we should do 2068 * roll-back. Otherwise, other reader/write can see corrupted database 2069 * until all the writers close its file. Since this should be done 2070 * before dropping file lock, it needs to do in ->flush. 2071 */ 2072 if (F2FS_I(inode)->atomic_write_task == current && 2073 (current->flags & PF_EXITING)) { 2074 inode_lock(inode); 2075 f2fs_abort_atomic_write(inode, true); 2076 inode_unlock(inode); 2077 } 2078 2079 return 0; 2080 } 2081 2082 static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) 2083 { 2084 struct f2fs_inode_info *fi = F2FS_I(inode); 2085 u32 masked_flags = fi->i_flags & mask; 2086 2087 /* mask can be shrunk by flags_valid selector */ 2088 iflags &= mask; 2089 2090 /* Is it quota file? Do not allow user to mess with it */ 2091 if (IS_NOQUOTA(inode)) 2092 return -EPERM; 2093 2094 if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) { 2095 if (!f2fs_sb_has_casefold(F2FS_I_SB(inode))) 2096 return -EOPNOTSUPP; 2097 if (!f2fs_empty_dir(inode)) 2098 return -ENOTEMPTY; 2099 } 2100 2101 if (iflags & (F2FS_COMPR_FL | F2FS_NOCOMP_FL)) { 2102 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 2103 return -EOPNOTSUPP; 2104 if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL)) 2105 return -EINVAL; 2106 } 2107 2108 if ((iflags ^ masked_flags) & F2FS_COMPR_FL) { 2109 if (masked_flags & F2FS_COMPR_FL) { 2110 if (!f2fs_disable_compressed_file(inode)) 2111 return -EINVAL; 2112 } else { 2113 /* try to convert inline_data to support compression */ 2114 int err = f2fs_convert_inline_inode(inode); 2115 if (err) 2116 return err; 2117 2118 f2fs_down_write(&fi->i_sem); 2119 if (!f2fs_may_compress(inode) || 2120 (S_ISREG(inode->i_mode) && 2121 F2FS_HAS_BLOCKS(inode))) { 2122 f2fs_up_write(&fi->i_sem); 2123 return -EINVAL; 2124 } 2125 err = set_compress_context(inode); 2126 f2fs_up_write(&fi->i_sem); 2127 2128 if (err) 2129 return err; 2130 } 2131 } 2132 2133 fi->i_flags = iflags | (fi->i_flags & ~mask); 2134 f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) && 2135 (fi->i_flags & F2FS_NOCOMP_FL)); 2136 2137 if (fi->i_flags & F2FS_PROJINHERIT_FL) 2138 set_inode_flag(inode, FI_PROJ_INHERIT); 2139 else 2140 clear_inode_flag(inode, FI_PROJ_INHERIT); 2141 2142 inode_set_ctime_current(inode); 2143 f2fs_set_inode_flags(inode); 2144 f2fs_mark_inode_dirty_sync(inode, true); 2145 return 0; 2146 } 2147 2148 /* FS_IOC_[GS]ETFLAGS and FS_IOC_FS[GS]ETXATTR support */ 2149 2150 /* 2151 * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry 2152 * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to 2153 * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add 2154 * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL. 2155 * 2156 * Translating flags to fsx_flags value used by FS_IOC_FSGETXATTR and 2157 * FS_IOC_FSSETXATTR is done by the VFS. 2158 */ 2159 2160 static const struct { 2161 u32 iflag; 2162 u32 fsflag; 2163 } f2fs_fsflags_map[] = { 2164 { F2FS_COMPR_FL, FS_COMPR_FL }, 2165 { F2FS_SYNC_FL, FS_SYNC_FL }, 2166 { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL }, 2167 { F2FS_APPEND_FL, FS_APPEND_FL }, 2168 { F2FS_NODUMP_FL, FS_NODUMP_FL }, 2169 { F2FS_NOATIME_FL, FS_NOATIME_FL }, 2170 { F2FS_NOCOMP_FL, FS_NOCOMP_FL }, 2171 { F2FS_INDEX_FL, FS_INDEX_FL }, 2172 { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL }, 2173 { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL }, 2174 { F2FS_CASEFOLD_FL, FS_CASEFOLD_FL }, 2175 }; 2176 2177 #define F2FS_GETTABLE_FS_FL ( \ 2178 FS_COMPR_FL | \ 2179 FS_SYNC_FL | \ 2180 FS_IMMUTABLE_FL | \ 2181 FS_APPEND_FL | \ 2182 FS_NODUMP_FL | \ 2183 FS_NOATIME_FL | \ 2184 FS_NOCOMP_FL | \ 2185 FS_INDEX_FL | \ 2186 FS_DIRSYNC_FL | \ 2187 FS_PROJINHERIT_FL | \ 2188 FS_ENCRYPT_FL | \ 2189 FS_INLINE_DATA_FL | \ 2190 FS_NOCOW_FL | \ 2191 FS_VERITY_FL | \ 2192 FS_CASEFOLD_FL) 2193 2194 #define F2FS_SETTABLE_FS_FL ( \ 2195 FS_COMPR_FL | \ 2196 FS_SYNC_FL | \ 2197 FS_IMMUTABLE_FL | \ 2198 FS_APPEND_FL | \ 2199 FS_NODUMP_FL | \ 2200 FS_NOATIME_FL | \ 2201 FS_NOCOMP_FL | \ 2202 FS_DIRSYNC_FL | \ 2203 FS_PROJINHERIT_FL | \ 2204 FS_CASEFOLD_FL) 2205 2206 /* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */ 2207 static inline u32 f2fs_iflags_to_fsflags(u32 iflags) 2208 { 2209 u32 fsflags = 0; 2210 int i; 2211 2212 for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) 2213 if (iflags & f2fs_fsflags_map[i].iflag) 2214 fsflags |= f2fs_fsflags_map[i].fsflag; 2215 2216 return fsflags; 2217 } 2218 2219 /* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */ 2220 static inline u32 f2fs_fsflags_to_iflags(u32 fsflags) 2221 { 2222 u32 iflags = 0; 2223 int i; 2224 2225 for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) 2226 if (fsflags & f2fs_fsflags_map[i].fsflag) 2227 iflags |= f2fs_fsflags_map[i].iflag; 2228 2229 return iflags; 2230 } 2231 2232 static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) 2233 { 2234 struct inode *inode = file_inode(filp); 2235 2236 return put_user(inode->i_generation, (int __user *)arg); 2237 } 2238 2239 static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) 2240 { 2241 struct inode *inode = file_inode(filp); 2242 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2243 struct f2fs_inode_info *fi = F2FS_I(inode); 2244 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2245 loff_t isize; 2246 int ret; 2247 2248 if (!(filp->f_mode & FMODE_WRITE)) 2249 return -EBADF; 2250 2251 if (!inode_owner_or_capable(idmap, inode)) 2252 return -EACCES; 2253 2254 if (!S_ISREG(inode->i_mode)) 2255 return -EINVAL; 2256 2257 if (filp->f_flags & O_DIRECT) 2258 return -EINVAL; 2259 2260 ret = mnt_want_write_file(filp); 2261 if (ret) 2262 return ret; 2263 2264 inode_lock(inode); 2265 2266 if (!f2fs_disable_compressed_file(inode) || 2267 f2fs_is_pinned_file(inode)) { 2268 ret = -EINVAL; 2269 goto out; 2270 } 2271 2272 if (f2fs_is_atomic_file(inode)) 2273 goto out; 2274 2275 ret = f2fs_convert_inline_inode(inode); 2276 if (ret) 2277 goto out; 2278 2279 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 2280 f2fs_down_write(&fi->i_gc_rwsem[READ]); 2281 2282 /* 2283 * Should wait end_io to count F2FS_WB_CP_DATA correctly by 2284 * f2fs_is_atomic_file. 2285 */ 2286 if (get_dirty_pages(inode)) 2287 f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u", 2288 inode->i_ino, get_dirty_pages(inode)); 2289 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 2290 if (ret) 2291 goto out_unlock; 2292 2293 /* Check if the inode already has a COW inode */ 2294 if (fi->cow_inode == NULL) { 2295 /* Create a COW inode for atomic write */ 2296 struct dentry *dentry = file_dentry(filp); 2297 struct inode *dir = d_inode(dentry->d_parent); 2298 2299 ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode); 2300 if (ret) 2301 goto out_unlock; 2302 2303 set_inode_flag(fi->cow_inode, FI_COW_FILE); 2304 clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); 2305 2306 /* Set the COW inode's atomic_inode to the atomic inode */ 2307 F2FS_I(fi->cow_inode)->atomic_inode = inode; 2308 } else { 2309 /* Reuse the already created COW inode */ 2310 f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode)); 2311 2312 invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1); 2313 2314 ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); 2315 if (ret) 2316 goto out_unlock; 2317 } 2318 2319 f2fs_write_inode(inode, NULL); 2320 2321 stat_inc_atomic_inode(inode); 2322 2323 set_inode_flag(inode, FI_ATOMIC_FILE); 2324 2325 isize = i_size_read(inode); 2326 fi->original_i_size = isize; 2327 if (truncate) { 2328 set_inode_flag(inode, FI_ATOMIC_REPLACE); 2329 truncate_inode_pages_final(inode->i_mapping); 2330 f2fs_i_size_write(inode, 0); 2331 isize = 0; 2332 } 2333 f2fs_i_size_write(fi->cow_inode, isize); 2334 2335 out_unlock: 2336 f2fs_up_write(&fi->i_gc_rwsem[READ]); 2337 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 2338 if (ret) 2339 goto out; 2340 2341 f2fs_update_time(sbi, REQ_TIME); 2342 fi->atomic_write_task = current; 2343 stat_update_max_atomic_write(inode); 2344 fi->atomic_write_cnt = 0; 2345 out: 2346 inode_unlock(inode); 2347 mnt_drop_write_file(filp); 2348 return ret; 2349 } 2350 2351 static int f2fs_ioc_commit_atomic_write(struct file *filp) 2352 { 2353 struct inode *inode = file_inode(filp); 2354 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2355 int ret; 2356 2357 if (!(filp->f_mode & FMODE_WRITE)) 2358 return -EBADF; 2359 2360 if (!inode_owner_or_capable(idmap, inode)) 2361 return -EACCES; 2362 2363 ret = mnt_want_write_file(filp); 2364 if (ret) 2365 return ret; 2366 2367 f2fs_balance_fs(F2FS_I_SB(inode), true); 2368 2369 inode_lock(inode); 2370 2371 if (f2fs_is_atomic_file(inode)) { 2372 ret = f2fs_commit_atomic_write(inode); 2373 if (!ret) 2374 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); 2375 2376 f2fs_abort_atomic_write(inode, ret); 2377 } else { 2378 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); 2379 } 2380 2381 inode_unlock(inode); 2382 mnt_drop_write_file(filp); 2383 return ret; 2384 } 2385 2386 static int f2fs_ioc_abort_atomic_write(struct file *filp) 2387 { 2388 struct inode *inode = file_inode(filp); 2389 struct mnt_idmap *idmap = file_mnt_idmap(filp); 2390 int ret; 2391 2392 if (!(filp->f_mode & FMODE_WRITE)) 2393 return -EBADF; 2394 2395 if (!inode_owner_or_capable(idmap, inode)) 2396 return -EACCES; 2397 2398 ret = mnt_want_write_file(filp); 2399 if (ret) 2400 return ret; 2401 2402 inode_lock(inode); 2403 2404 f2fs_abort_atomic_write(inode, true); 2405 2406 inode_unlock(inode); 2407 2408 mnt_drop_write_file(filp); 2409 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2410 return ret; 2411 } 2412 2413 int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, 2414 bool readonly, bool need_lock) 2415 { 2416 struct super_block *sb = sbi->sb; 2417 int ret = 0; 2418 2419 switch (flag) { 2420 case F2FS_GOING_DOWN_FULLSYNC: 2421 ret = bdev_freeze(sb->s_bdev); 2422 if (ret) 2423 goto out; 2424 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2425 bdev_thaw(sb->s_bdev); 2426 break; 2427 case F2FS_GOING_DOWN_METASYNC: 2428 /* do checkpoint only */ 2429 ret = f2fs_sync_fs(sb, 1); 2430 if (ret) { 2431 if (ret == -EIO) 2432 ret = 0; 2433 goto out; 2434 } 2435 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2436 break; 2437 case F2FS_GOING_DOWN_NOSYNC: 2438 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2439 break; 2440 case F2FS_GOING_DOWN_METAFLUSH: 2441 f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); 2442 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); 2443 break; 2444 case F2FS_GOING_DOWN_NEED_FSCK: 2445 set_sbi_flag(sbi, SBI_NEED_FSCK); 2446 set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); 2447 set_sbi_flag(sbi, SBI_IS_DIRTY); 2448 /* do checkpoint only */ 2449 ret = f2fs_sync_fs(sb, 1); 2450 if (ret == -EIO) 2451 ret = 0; 2452 goto out; 2453 default: 2454 ret = -EINVAL; 2455 goto out; 2456 } 2457 2458 if (readonly) 2459 goto out; 2460 2461 /* 2462 * grab sb->s_umount to avoid racing w/ remount() and other shutdown 2463 * paths. 2464 */ 2465 if (need_lock) 2466 down_write(&sbi->sb->s_umount); 2467 2468 f2fs_stop_gc_thread(sbi); 2469 f2fs_stop_discard_thread(sbi); 2470 2471 f2fs_drop_discard_cmd(sbi); 2472 clear_opt(sbi, DISCARD); 2473 2474 if (need_lock) 2475 up_write(&sbi->sb->s_umount); 2476 2477 f2fs_update_time(sbi, REQ_TIME); 2478 out: 2479 2480 trace_f2fs_shutdown(sbi, flag, ret); 2481 2482 return ret; 2483 } 2484 2485 static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) 2486 { 2487 struct inode *inode = file_inode(filp); 2488 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2489 __u32 in; 2490 int ret; 2491 bool need_drop = false, readonly = false; 2492 2493 if (!capable(CAP_SYS_ADMIN)) 2494 return -EPERM; 2495 2496 if (get_user(in, (__u32 __user *)arg)) 2497 return -EFAULT; 2498 2499 if (in != F2FS_GOING_DOWN_FULLSYNC) { 2500 ret = mnt_want_write_file(filp); 2501 if (ret) { 2502 if (ret != -EROFS) 2503 return ret; 2504 2505 /* fallback to nosync shutdown for readonly fs */ 2506 in = F2FS_GOING_DOWN_NOSYNC; 2507 readonly = true; 2508 } else { 2509 need_drop = true; 2510 } 2511 } 2512 2513 ret = f2fs_do_shutdown(sbi, in, readonly, true); 2514 2515 if (need_drop) 2516 mnt_drop_write_file(filp); 2517 2518 return ret; 2519 } 2520 2521 static int f2fs_keep_noreuse_range(struct inode *inode, 2522 loff_t offset, loff_t len) 2523 { 2524 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2525 u64 max_bytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); 2526 u64 start, end; 2527 int ret = 0; 2528 2529 if (!S_ISREG(inode->i_mode)) 2530 return 0; 2531 2532 if (offset >= max_bytes || len > max_bytes || 2533 (offset + len) > max_bytes) 2534 return 0; 2535 2536 start = offset >> PAGE_SHIFT; 2537 end = DIV_ROUND_UP(offset + len, PAGE_SIZE); 2538 2539 inode_lock(inode); 2540 if (f2fs_is_atomic_file(inode)) { 2541 inode_unlock(inode); 2542 return 0; 2543 } 2544 2545 spin_lock(&sbi->inode_lock[DONATE_INODE]); 2546 /* let's remove the range, if len = 0 */ 2547 if (!len) { 2548 if (!list_empty(&F2FS_I(inode)->gdonate_list)) { 2549 list_del_init(&F2FS_I(inode)->gdonate_list); 2550 sbi->donate_files--; 2551 if (is_inode_flag_set(inode, FI_DONATE_FINISHED)) 2552 ret = -EALREADY; 2553 else 2554 set_inode_flag(inode, FI_DONATE_FINISHED); 2555 } else 2556 ret = -ENOENT; 2557 } else { 2558 if (list_empty(&F2FS_I(inode)->gdonate_list)) { 2559 list_add_tail(&F2FS_I(inode)->gdonate_list, 2560 &sbi->inode_list[DONATE_INODE]); 2561 sbi->donate_files++; 2562 } else { 2563 list_move_tail(&F2FS_I(inode)->gdonate_list, 2564 &sbi->inode_list[DONATE_INODE]); 2565 } 2566 F2FS_I(inode)->donate_start = start; 2567 F2FS_I(inode)->donate_end = end - 1; 2568 clear_inode_flag(inode, FI_DONATE_FINISHED); 2569 } 2570 spin_unlock(&sbi->inode_lock[DONATE_INODE]); 2571 inode_unlock(inode); 2572 2573 return ret; 2574 } 2575 2576 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) 2577 { 2578 struct inode *inode = file_inode(filp); 2579 struct super_block *sb = inode->i_sb; 2580 struct fstrim_range range; 2581 int ret; 2582 2583 if (!capable(CAP_SYS_ADMIN)) 2584 return -EPERM; 2585 2586 if (!f2fs_hw_support_discard(F2FS_SB(sb))) 2587 return -EOPNOTSUPP; 2588 2589 if (copy_from_user(&range, (struct fstrim_range __user *)arg, 2590 sizeof(range))) 2591 return -EFAULT; 2592 2593 ret = mnt_want_write_file(filp); 2594 if (ret) 2595 return ret; 2596 2597 range.minlen = max((unsigned int)range.minlen, 2598 bdev_discard_granularity(sb->s_bdev)); 2599 ret = f2fs_trim_fs(F2FS_SB(sb), &range); 2600 mnt_drop_write_file(filp); 2601 if (ret < 0) 2602 return ret; 2603 2604 if (copy_to_user((struct fstrim_range __user *)arg, &range, 2605 sizeof(range))) 2606 return -EFAULT; 2607 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2608 return 0; 2609 } 2610 2611 static bool uuid_is_nonzero(__u8 u[16]) 2612 { 2613 int i; 2614 2615 for (i = 0; i < 16; i++) 2616 if (u[i]) 2617 return true; 2618 return false; 2619 } 2620 2621 static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) 2622 { 2623 struct inode *inode = file_inode(filp); 2624 int ret; 2625 2626 if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode))) 2627 return -EOPNOTSUPP; 2628 2629 ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg); 2630 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2631 return ret; 2632 } 2633 2634 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) 2635 { 2636 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2637 return -EOPNOTSUPP; 2638 return fscrypt_ioctl_get_policy(filp, (void __user *)arg); 2639 } 2640 2641 static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) 2642 { 2643 struct inode *inode = file_inode(filp); 2644 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2645 u8 encrypt_pw_salt[16]; 2646 int err; 2647 2648 if (!f2fs_sb_has_encrypt(sbi)) 2649 return -EOPNOTSUPP; 2650 2651 err = mnt_want_write_file(filp); 2652 if (err) 2653 return err; 2654 2655 f2fs_down_write(&sbi->sb_lock); 2656 2657 if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) 2658 goto got_it; 2659 2660 /* update superblock with uuid */ 2661 generate_random_uuid(sbi->raw_super->encrypt_pw_salt); 2662 2663 err = f2fs_commit_super(sbi, false); 2664 if (err) { 2665 /* undo new data */ 2666 memset(sbi->raw_super->encrypt_pw_salt, 0, 16); 2667 goto out_err; 2668 } 2669 got_it: 2670 memcpy(encrypt_pw_salt, sbi->raw_super->encrypt_pw_salt, 16); 2671 out_err: 2672 f2fs_up_write(&sbi->sb_lock); 2673 mnt_drop_write_file(filp); 2674 2675 if (!err && copy_to_user((__u8 __user *)arg, encrypt_pw_salt, 16)) 2676 err = -EFAULT; 2677 2678 return err; 2679 } 2680 2681 static int f2fs_ioc_get_encryption_policy_ex(struct file *filp, 2682 unsigned long arg) 2683 { 2684 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2685 return -EOPNOTSUPP; 2686 2687 return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg); 2688 } 2689 2690 static int f2fs_ioc_add_encryption_key(struct file *filp, unsigned long arg) 2691 { 2692 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2693 return -EOPNOTSUPP; 2694 2695 return fscrypt_ioctl_add_key(filp, (void __user *)arg); 2696 } 2697 2698 static int f2fs_ioc_remove_encryption_key(struct file *filp, unsigned long arg) 2699 { 2700 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2701 return -EOPNOTSUPP; 2702 2703 return fscrypt_ioctl_remove_key(filp, (void __user *)arg); 2704 } 2705 2706 static int f2fs_ioc_remove_encryption_key_all_users(struct file *filp, 2707 unsigned long arg) 2708 { 2709 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2710 return -EOPNOTSUPP; 2711 2712 return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg); 2713 } 2714 2715 static int f2fs_ioc_get_encryption_key_status(struct file *filp, 2716 unsigned long arg) 2717 { 2718 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2719 return -EOPNOTSUPP; 2720 2721 return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); 2722 } 2723 2724 static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg) 2725 { 2726 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) 2727 return -EOPNOTSUPP; 2728 2729 return fscrypt_ioctl_get_nonce(filp, (void __user *)arg); 2730 } 2731 2732 static int f2fs_ioc_gc(struct file *filp, unsigned long arg) 2733 { 2734 struct inode *inode = file_inode(filp); 2735 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2736 struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, 2737 .no_bg_gc = false, 2738 .should_migrate_blocks = false, 2739 .nr_free_secs = 0 }; 2740 __u32 sync; 2741 int ret; 2742 2743 if (!capable(CAP_SYS_ADMIN)) 2744 return -EPERM; 2745 2746 if (get_user(sync, (__u32 __user *)arg)) 2747 return -EFAULT; 2748 2749 if (f2fs_readonly(sbi->sb)) 2750 return -EROFS; 2751 2752 ret = mnt_want_write_file(filp); 2753 if (ret) 2754 return ret; 2755 2756 if (!sync) { 2757 if (!f2fs_down_write_trylock(&sbi->gc_lock)) { 2758 ret = -EBUSY; 2759 goto out; 2760 } 2761 } else { 2762 f2fs_down_write(&sbi->gc_lock); 2763 } 2764 2765 gc_control.init_gc_type = sync ? FG_GC : BG_GC; 2766 gc_control.err_gc_skipped = sync; 2767 stat_inc_gc_call_count(sbi, FOREGROUND); 2768 ret = f2fs_gc(sbi, &gc_control); 2769 out: 2770 mnt_drop_write_file(filp); 2771 return ret; 2772 } 2773 2774 static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range) 2775 { 2776 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); 2777 struct f2fs_gc_control gc_control = { 2778 .init_gc_type = range->sync ? FG_GC : BG_GC, 2779 .no_bg_gc = false, 2780 .should_migrate_blocks = false, 2781 .err_gc_skipped = range->sync, 2782 .nr_free_secs = 0 }; 2783 u64 end; 2784 int ret; 2785 2786 if (!capable(CAP_SYS_ADMIN)) 2787 return -EPERM; 2788 if (f2fs_readonly(sbi->sb)) 2789 return -EROFS; 2790 2791 end = range->start + range->len; 2792 if (end < range->start || range->start < MAIN_BLKADDR(sbi) || 2793 end >= MAX_BLKADDR(sbi)) 2794 return -EINVAL; 2795 2796 ret = mnt_want_write_file(filp); 2797 if (ret) 2798 return ret; 2799 2800 do_more: 2801 if (!range->sync) { 2802 if (!f2fs_down_write_trylock(&sbi->gc_lock)) { 2803 ret = -EBUSY; 2804 goto out; 2805 } 2806 } else { 2807 f2fs_down_write(&sbi->gc_lock); 2808 } 2809 2810 gc_control.victim_segno = GET_SEGNO(sbi, range->start); 2811 stat_inc_gc_call_count(sbi, FOREGROUND); 2812 ret = f2fs_gc(sbi, &gc_control); 2813 if (ret) { 2814 if (ret == -EBUSY) 2815 ret = -EAGAIN; 2816 goto out; 2817 } 2818 range->start += CAP_BLKS_PER_SEC(sbi); 2819 if (range->start <= end) 2820 goto do_more; 2821 out: 2822 mnt_drop_write_file(filp); 2823 return ret; 2824 } 2825 2826 static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) 2827 { 2828 struct f2fs_gc_range range; 2829 2830 if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg, 2831 sizeof(range))) 2832 return -EFAULT; 2833 return __f2fs_ioc_gc_range(filp, &range); 2834 } 2835 2836 static int f2fs_ioc_write_checkpoint(struct file *filp) 2837 { 2838 struct inode *inode = file_inode(filp); 2839 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2840 int ret; 2841 2842 if (!capable(CAP_SYS_ADMIN)) 2843 return -EPERM; 2844 2845 if (f2fs_readonly(sbi->sb)) 2846 return -EROFS; 2847 2848 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 2849 f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled."); 2850 return -EINVAL; 2851 } 2852 2853 ret = mnt_want_write_file(filp); 2854 if (ret) 2855 return ret; 2856 2857 ret = f2fs_sync_fs(sbi->sb, 1); 2858 2859 mnt_drop_write_file(filp); 2860 return ret; 2861 } 2862 2863 static int f2fs_defragment_range(struct f2fs_sb_info *sbi, 2864 struct file *filp, 2865 struct f2fs_defragment *range) 2866 { 2867 struct inode *inode = file_inode(filp); 2868 struct f2fs_map_blocks map = { .m_next_extent = NULL, 2869 .m_seg_type = NO_CHECK_TYPE, 2870 .m_may_create = false }; 2871 struct extent_info ei = {}; 2872 pgoff_t pg_start, pg_end, next_pgofs; 2873 unsigned int total = 0, sec_num; 2874 block_t blk_end = 0; 2875 bool fragmented = false; 2876 int err; 2877 2878 f2fs_balance_fs(sbi, true); 2879 2880 inode_lock(inode); 2881 pg_start = range->start >> PAGE_SHIFT; 2882 pg_end = min_t(pgoff_t, 2883 (range->start + range->len) >> PAGE_SHIFT, 2884 DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE)); 2885 2886 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) || 2887 f2fs_is_atomic_file(inode)) { 2888 err = -EINVAL; 2889 goto unlock_out; 2890 } 2891 2892 /* if in-place-update policy is enabled, don't waste time here */ 2893 set_inode_flag(inode, FI_OPU_WRITE); 2894 if (f2fs_should_update_inplace(inode, NULL)) { 2895 err = -EINVAL; 2896 goto out; 2897 } 2898 2899 /* writeback all dirty pages in the range */ 2900 err = filemap_write_and_wait_range(inode->i_mapping, 2901 pg_start << PAGE_SHIFT, 2902 (pg_end << PAGE_SHIFT) - 1); 2903 if (err) 2904 goto out; 2905 2906 /* 2907 * lookup mapping info in extent cache, skip defragmenting if physical 2908 * block addresses are continuous. 2909 */ 2910 if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) { 2911 if ((pgoff_t)ei.fofs + ei.len >= pg_end) 2912 goto out; 2913 } 2914 2915 map.m_lblk = pg_start; 2916 map.m_next_pgofs = &next_pgofs; 2917 2918 /* 2919 * lookup mapping info in dnode page cache, skip defragmenting if all 2920 * physical block addresses are continuous even if there are hole(s) 2921 * in logical blocks. 2922 */ 2923 while (map.m_lblk < pg_end) { 2924 map.m_len = pg_end - map.m_lblk; 2925 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); 2926 if (err) 2927 goto out; 2928 2929 if (!(map.m_flags & F2FS_MAP_FLAGS)) { 2930 map.m_lblk = next_pgofs; 2931 continue; 2932 } 2933 2934 if (blk_end && blk_end != map.m_pblk) 2935 fragmented = true; 2936 2937 /* record total count of block that we're going to move */ 2938 total += map.m_len; 2939 2940 blk_end = map.m_pblk + map.m_len; 2941 2942 map.m_lblk += map.m_len; 2943 } 2944 2945 if (!fragmented) { 2946 total = 0; 2947 goto out; 2948 } 2949 2950 sec_num = DIV_ROUND_UP(total, CAP_BLKS_PER_SEC(sbi)); 2951 2952 /* 2953 * make sure there are enough free section for LFS allocation, this can 2954 * avoid defragment running in SSR mode when free section are allocated 2955 * intensively 2956 */ 2957 if (has_not_enough_free_secs(sbi, 0, sec_num)) { 2958 err = -EAGAIN; 2959 goto out; 2960 } 2961 2962 map.m_lblk = pg_start; 2963 map.m_len = pg_end - pg_start; 2964 total = 0; 2965 2966 while (map.m_lblk < pg_end) { 2967 pgoff_t idx; 2968 int cnt = 0; 2969 2970 do_map: 2971 map.m_len = pg_end - map.m_lblk; 2972 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); 2973 if (err) 2974 goto clear_out; 2975 2976 if (!(map.m_flags & F2FS_MAP_FLAGS)) { 2977 map.m_lblk = next_pgofs; 2978 goto check; 2979 } 2980 2981 set_inode_flag(inode, FI_SKIP_WRITES); 2982 2983 idx = map.m_lblk; 2984 while (idx < map.m_lblk + map.m_len && 2985 cnt < BLKS_PER_SEG(sbi)) { 2986 struct folio *folio; 2987 2988 folio = f2fs_get_lock_data_folio(inode, idx, true); 2989 if (IS_ERR(folio)) { 2990 err = PTR_ERR(folio); 2991 goto clear_out; 2992 } 2993 2994 f2fs_folio_wait_writeback(folio, DATA, true, true); 2995 2996 folio_mark_dirty(folio); 2997 folio_set_f2fs_gcing(folio); 2998 f2fs_folio_put(folio, true); 2999 3000 idx++; 3001 cnt++; 3002 total++; 3003 } 3004 3005 map.m_lblk = idx; 3006 check: 3007 if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi)) 3008 goto do_map; 3009 3010 clear_inode_flag(inode, FI_SKIP_WRITES); 3011 3012 err = filemap_fdatawrite(inode->i_mapping); 3013 if (err) 3014 goto out; 3015 } 3016 clear_out: 3017 clear_inode_flag(inode, FI_SKIP_WRITES); 3018 out: 3019 clear_inode_flag(inode, FI_OPU_WRITE); 3020 unlock_out: 3021 inode_unlock(inode); 3022 if (!err) 3023 range->len = (u64)total << PAGE_SHIFT; 3024 return err; 3025 } 3026 3027 static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) 3028 { 3029 struct inode *inode = file_inode(filp); 3030 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3031 struct f2fs_defragment range; 3032 int err; 3033 3034 if (!capable(CAP_SYS_ADMIN)) 3035 return -EPERM; 3036 3037 if (!S_ISREG(inode->i_mode)) 3038 return -EINVAL; 3039 3040 if (f2fs_readonly(sbi->sb)) 3041 return -EROFS; 3042 3043 if (copy_from_user(&range, (struct f2fs_defragment __user *)arg, 3044 sizeof(range))) 3045 return -EFAULT; 3046 3047 /* verify alignment of offset & size */ 3048 if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1)) 3049 return -EINVAL; 3050 3051 if (unlikely((range.start + range.len) >> PAGE_SHIFT > 3052 max_file_blocks(inode))) 3053 return -EINVAL; 3054 3055 err = mnt_want_write_file(filp); 3056 if (err) 3057 return err; 3058 3059 err = f2fs_defragment_range(sbi, filp, &range); 3060 mnt_drop_write_file(filp); 3061 3062 if (range.len) 3063 f2fs_update_time(sbi, REQ_TIME); 3064 if (err < 0) 3065 return err; 3066 3067 if (copy_to_user((struct f2fs_defragment __user *)arg, &range, 3068 sizeof(range))) 3069 return -EFAULT; 3070 3071 return 0; 3072 } 3073 3074 static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, 3075 struct file *file_out, loff_t pos_out, size_t len) 3076 { 3077 struct inode *src = file_inode(file_in); 3078 struct inode *dst = file_inode(file_out); 3079 struct f2fs_sb_info *sbi = F2FS_I_SB(src); 3080 size_t olen = len, dst_max_i_size = 0; 3081 size_t dst_osize; 3082 int ret; 3083 3084 if (file_in->f_path.mnt != file_out->f_path.mnt || 3085 src->i_sb != dst->i_sb) 3086 return -EXDEV; 3087 3088 if (unlikely(f2fs_readonly(src->i_sb))) 3089 return -EROFS; 3090 3091 if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode)) 3092 return -EINVAL; 3093 3094 if (IS_ENCRYPTED(src) || IS_ENCRYPTED(dst)) 3095 return -EOPNOTSUPP; 3096 3097 if (pos_out < 0 || pos_in < 0) 3098 return -EINVAL; 3099 3100 if (src == dst) { 3101 if (pos_in == pos_out) 3102 return 0; 3103 if (pos_out > pos_in && pos_out < pos_in + len) 3104 return -EINVAL; 3105 } 3106 3107 inode_lock(src); 3108 if (src != dst) { 3109 ret = -EBUSY; 3110 if (!inode_trylock(dst)) 3111 goto out; 3112 } 3113 3114 if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) || 3115 f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) { 3116 ret = -EOPNOTSUPP; 3117 goto out_unlock; 3118 } 3119 3120 if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) { 3121 ret = -EINVAL; 3122 goto out_unlock; 3123 } 3124 3125 ret = -EINVAL; 3126 if (pos_in + len > src->i_size || pos_in + len < pos_in) 3127 goto out_unlock; 3128 if (len == 0) 3129 olen = len = src->i_size - pos_in; 3130 if (pos_in + len == src->i_size) 3131 len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in; 3132 if (len == 0) { 3133 ret = 0; 3134 goto out_unlock; 3135 } 3136 3137 dst_osize = dst->i_size; 3138 if (pos_out + olen > dst->i_size) 3139 dst_max_i_size = pos_out + olen; 3140 3141 /* verify the end result is block aligned */ 3142 if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) || 3143 !IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) || 3144 !IS_ALIGNED(pos_out, F2FS_BLKSIZE)) 3145 goto out_unlock; 3146 3147 ret = f2fs_convert_inline_inode(src); 3148 if (ret) 3149 goto out_unlock; 3150 3151 ret = f2fs_convert_inline_inode(dst); 3152 if (ret) 3153 goto out_unlock; 3154 3155 /* write out all dirty pages from offset */ 3156 ret = filemap_write_and_wait_range(src->i_mapping, 3157 pos_in, pos_in + len); 3158 if (ret) 3159 goto out_unlock; 3160 3161 ret = filemap_write_and_wait_range(dst->i_mapping, 3162 pos_out, pos_out + len); 3163 if (ret) 3164 goto out_unlock; 3165 3166 f2fs_balance_fs(sbi, true); 3167 3168 f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); 3169 if (src != dst) { 3170 ret = -EBUSY; 3171 if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) 3172 goto out_src; 3173 } 3174 3175 f2fs_lock_op(sbi); 3176 ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in), 3177 F2FS_BYTES_TO_BLK(pos_out), 3178 F2FS_BYTES_TO_BLK(len), false); 3179 3180 if (!ret) { 3181 if (dst_max_i_size) 3182 f2fs_i_size_write(dst, dst_max_i_size); 3183 else if (dst_osize != dst->i_size) 3184 f2fs_i_size_write(dst, dst_osize); 3185 } 3186 f2fs_unlock_op(sbi); 3187 3188 if (src != dst) 3189 f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); 3190 out_src: 3191 f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); 3192 if (ret) 3193 goto out_unlock; 3194 3195 inode_set_mtime_to_ts(src, inode_set_ctime_current(src)); 3196 f2fs_mark_inode_dirty_sync(src, false); 3197 if (src != dst) { 3198 inode_set_mtime_to_ts(dst, inode_set_ctime_current(dst)); 3199 f2fs_mark_inode_dirty_sync(dst, false); 3200 } 3201 f2fs_update_time(sbi, REQ_TIME); 3202 3203 out_unlock: 3204 if (src != dst) 3205 inode_unlock(dst); 3206 out: 3207 inode_unlock(src); 3208 return ret; 3209 } 3210 3211 static int __f2fs_ioc_move_range(struct file *filp, 3212 struct f2fs_move_range *range) 3213 { 3214 int err; 3215 3216 if (!(filp->f_mode & FMODE_READ) || 3217 !(filp->f_mode & FMODE_WRITE)) 3218 return -EBADF; 3219 3220 CLASS(fd, dst)(range->dst_fd); 3221 if (fd_empty(dst)) 3222 return -EBADF; 3223 3224 if (!(fd_file(dst)->f_mode & FMODE_WRITE)) 3225 return -EBADF; 3226 3227 err = mnt_want_write_file(filp); 3228 if (err) 3229 return err; 3230 3231 err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst), 3232 range->pos_out, range->len); 3233 3234 mnt_drop_write_file(filp); 3235 return err; 3236 } 3237 3238 static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) 3239 { 3240 struct f2fs_move_range range; 3241 3242 if (copy_from_user(&range, (struct f2fs_move_range __user *)arg, 3243 sizeof(range))) 3244 return -EFAULT; 3245 return __f2fs_ioc_move_range(filp, &range); 3246 } 3247 3248 static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) 3249 { 3250 struct inode *inode = file_inode(filp); 3251 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3252 struct sit_info *sm = SIT_I(sbi); 3253 unsigned int start_segno = 0, end_segno = 0; 3254 unsigned int dev_start_segno = 0, dev_end_segno = 0; 3255 struct f2fs_flush_device range; 3256 struct f2fs_gc_control gc_control = { 3257 .init_gc_type = FG_GC, 3258 .should_migrate_blocks = true, 3259 .err_gc_skipped = true, 3260 .nr_free_secs = 0 }; 3261 int ret; 3262 3263 if (!capable(CAP_SYS_ADMIN)) 3264 return -EPERM; 3265 3266 if (f2fs_readonly(sbi->sb)) 3267 return -EROFS; 3268 3269 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) 3270 return -EINVAL; 3271 3272 if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, 3273 sizeof(range))) 3274 return -EFAULT; 3275 3276 if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num || 3277 __is_large_section(sbi)) { 3278 f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1", 3279 range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi)); 3280 return -EINVAL; 3281 } 3282 3283 ret = mnt_want_write_file(filp); 3284 if (ret) 3285 return ret; 3286 3287 if (range.dev_num != 0) 3288 dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk); 3289 dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk); 3290 3291 start_segno = sm->last_victim[FLUSH_DEVICE]; 3292 if (start_segno < dev_start_segno || start_segno >= dev_end_segno) 3293 start_segno = dev_start_segno; 3294 end_segno = min(start_segno + range.segments, dev_end_segno); 3295 3296 while (start_segno < end_segno) { 3297 if (!f2fs_down_write_trylock(&sbi->gc_lock)) { 3298 ret = -EBUSY; 3299 goto out; 3300 } 3301 sm->last_victim[GC_CB] = end_segno + 1; 3302 sm->last_victim[GC_GREEDY] = end_segno + 1; 3303 sm->last_victim[ALLOC_NEXT] = end_segno + 1; 3304 3305 gc_control.victim_segno = start_segno; 3306 stat_inc_gc_call_count(sbi, FOREGROUND); 3307 ret = f2fs_gc(sbi, &gc_control); 3308 if (ret == -EAGAIN) 3309 ret = 0; 3310 else if (ret < 0) 3311 break; 3312 start_segno++; 3313 } 3314 out: 3315 mnt_drop_write_file(filp); 3316 return ret; 3317 } 3318 3319 static int f2fs_ioc_get_features(struct file *filp, unsigned long arg) 3320 { 3321 struct inode *inode = file_inode(filp); 3322 u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature); 3323 3324 /* Must validate to set it with SQLite behavior in Android. */ 3325 sb_feature |= F2FS_FEATURE_ATOMIC_WRITE; 3326 3327 return put_user(sb_feature, (u32 __user *)arg); 3328 } 3329 3330 #ifdef CONFIG_QUOTA 3331 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) 3332 { 3333 struct dquot *transfer_to[MAXQUOTAS] = {}; 3334 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3335 struct super_block *sb = sbi->sb; 3336 int err; 3337 3338 transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); 3339 if (IS_ERR(transfer_to[PRJQUOTA])) 3340 return PTR_ERR(transfer_to[PRJQUOTA]); 3341 3342 err = __dquot_transfer(inode, transfer_to); 3343 if (err) 3344 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); 3345 dqput(transfer_to[PRJQUOTA]); 3346 return err; 3347 } 3348 3349 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) 3350 { 3351 struct f2fs_inode_info *fi = F2FS_I(inode); 3352 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3353 struct f2fs_inode *ri = NULL; 3354 kprojid_t kprojid; 3355 int err; 3356 3357 if (!f2fs_sb_has_project_quota(sbi)) { 3358 if (projid != F2FS_DEF_PROJID) 3359 return -EOPNOTSUPP; 3360 else 3361 return 0; 3362 } 3363 3364 if (!f2fs_has_extra_attr(inode)) 3365 return -EOPNOTSUPP; 3366 3367 kprojid = make_kprojid(&init_user_ns, (projid_t)projid); 3368 3369 if (projid_eq(kprojid, fi->i_projid)) 3370 return 0; 3371 3372 err = -EPERM; 3373 /* Is it quota file? Do not allow user to mess with it */ 3374 if (IS_NOQUOTA(inode)) 3375 return err; 3376 3377 if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) 3378 return -EOVERFLOW; 3379 3380 err = f2fs_dquot_initialize(inode); 3381 if (err) 3382 return err; 3383 3384 f2fs_lock_op(sbi); 3385 err = f2fs_transfer_project_quota(inode, kprojid); 3386 if (err) 3387 goto out_unlock; 3388 3389 fi->i_projid = kprojid; 3390 inode_set_ctime_current(inode); 3391 f2fs_mark_inode_dirty_sync(inode, true); 3392 out_unlock: 3393 f2fs_unlock_op(sbi); 3394 return err; 3395 } 3396 #else 3397 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) 3398 { 3399 return 0; 3400 } 3401 3402 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) 3403 { 3404 if (projid != F2FS_DEF_PROJID) 3405 return -EOPNOTSUPP; 3406 return 0; 3407 } 3408 #endif 3409 3410 int f2fs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) 3411 { 3412 struct inode *inode = d_inode(dentry); 3413 struct f2fs_inode_info *fi = F2FS_I(inode); 3414 u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags); 3415 3416 if (IS_ENCRYPTED(inode)) 3417 fsflags |= FS_ENCRYPT_FL; 3418 if (IS_VERITY(inode)) 3419 fsflags |= FS_VERITY_FL; 3420 if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) 3421 fsflags |= FS_INLINE_DATA_FL; 3422 if (is_inode_flag_set(inode, FI_PIN_FILE)) 3423 fsflags |= FS_NOCOW_FL; 3424 3425 fileattr_fill_flags(fa, fsflags & F2FS_GETTABLE_FS_FL); 3426 3427 if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) 3428 fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid); 3429 3430 return 0; 3431 } 3432 3433 int f2fs_fileattr_set(struct mnt_idmap *idmap, 3434 struct dentry *dentry, struct file_kattr *fa) 3435 { 3436 struct inode *inode = d_inode(dentry); 3437 u32 fsflags = fa->flags, mask = F2FS_SETTABLE_FS_FL; 3438 u32 iflags; 3439 int err; 3440 3441 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 3442 return -EIO; 3443 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) 3444 return -ENOSPC; 3445 if (fsflags & ~F2FS_GETTABLE_FS_FL) 3446 return -EOPNOTSUPP; 3447 fsflags &= F2FS_SETTABLE_FS_FL; 3448 if (!fa->flags_valid) 3449 mask &= FS_COMMON_FL; 3450 3451 iflags = f2fs_fsflags_to_iflags(fsflags); 3452 if (f2fs_mask_flags(inode->i_mode, iflags) != iflags) 3453 return -EOPNOTSUPP; 3454 3455 err = f2fs_setflags_common(inode, iflags, f2fs_fsflags_to_iflags(mask)); 3456 if (!err) 3457 err = f2fs_ioc_setproject(inode, fa->fsx_projid); 3458 3459 return err; 3460 } 3461 3462 int f2fs_pin_file_control(struct inode *inode, bool inc) 3463 { 3464 struct f2fs_inode_info *fi = F2FS_I(inode); 3465 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3466 3467 if (IS_DEVICE_ALIASING(inode)) 3468 return -EINVAL; 3469 3470 if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { 3471 f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", 3472 __func__, inode->i_ino, fi->i_gc_failures); 3473 clear_inode_flag(inode, FI_PIN_FILE); 3474 return -EAGAIN; 3475 } 3476 3477 /* Use i_gc_failures for normal file as a risk signal. */ 3478 if (inc) 3479 f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1); 3480 3481 return 0; 3482 } 3483 3484 static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) 3485 { 3486 struct inode *inode = file_inode(filp); 3487 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3488 __u32 pin; 3489 int ret = 0; 3490 3491 if (get_user(pin, (__u32 __user *)arg)) 3492 return -EFAULT; 3493 3494 if (!S_ISREG(inode->i_mode)) 3495 return -EINVAL; 3496 3497 if (f2fs_readonly(sbi->sb)) 3498 return -EROFS; 3499 3500 if (!pin && IS_DEVICE_ALIASING(inode)) 3501 return -EOPNOTSUPP; 3502 3503 ret = mnt_want_write_file(filp); 3504 if (ret) 3505 return ret; 3506 3507 inode_lock(inode); 3508 3509 if (f2fs_is_atomic_file(inode)) { 3510 ret = -EINVAL; 3511 goto out; 3512 } 3513 3514 if (!pin) { 3515 clear_inode_flag(inode, FI_PIN_FILE); 3516 f2fs_i_gc_failures_write(inode, 0); 3517 goto done; 3518 } else if (f2fs_is_pinned_file(inode)) { 3519 goto done; 3520 } 3521 3522 if (F2FS_HAS_BLOCKS(inode)) { 3523 ret = -EFBIG; 3524 goto out; 3525 } 3526 3527 /* Let's allow file pinning on zoned device. */ 3528 if (!f2fs_sb_has_blkzoned(sbi) && 3529 f2fs_should_update_outplace(inode, NULL)) { 3530 ret = -EINVAL; 3531 goto out; 3532 } 3533 3534 if (f2fs_pin_file_control(inode, false)) { 3535 ret = -EAGAIN; 3536 goto out; 3537 } 3538 3539 ret = f2fs_convert_inline_inode(inode); 3540 if (ret) 3541 goto out; 3542 3543 if (!f2fs_disable_compressed_file(inode)) { 3544 ret = -EOPNOTSUPP; 3545 goto out; 3546 } 3547 3548 set_inode_flag(inode, FI_PIN_FILE); 3549 ret = F2FS_I(inode)->i_gc_failures; 3550 done: 3551 f2fs_update_time(sbi, REQ_TIME); 3552 out: 3553 inode_unlock(inode); 3554 mnt_drop_write_file(filp); 3555 return ret; 3556 } 3557 3558 static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) 3559 { 3560 struct inode *inode = file_inode(filp); 3561 __u32 pin = 0; 3562 3563 if (is_inode_flag_set(inode, FI_PIN_FILE)) 3564 pin = F2FS_I(inode)->i_gc_failures; 3565 return put_user(pin, (u32 __user *)arg); 3566 } 3567 3568 static int f2fs_ioc_get_dev_alias_file(struct file *filp, unsigned long arg) 3569 { 3570 return put_user(IS_DEVICE_ALIASING(file_inode(filp)) ? 1 : 0, 3571 (u32 __user *)arg); 3572 } 3573 3574 static int f2fs_ioc_io_prio(struct file *filp, unsigned long arg) 3575 { 3576 struct inode *inode = file_inode(filp); 3577 __u32 level; 3578 3579 if (get_user(level, (__u32 __user *)arg)) 3580 return -EFAULT; 3581 3582 if (!S_ISREG(inode->i_mode) || level >= F2FS_IOPRIO_MAX) 3583 return -EINVAL; 3584 3585 inode_lock(inode); 3586 F2FS_I(inode)->ioprio_hint = level; 3587 inode_unlock(inode); 3588 return 0; 3589 } 3590 3591 int f2fs_precache_extents(struct inode *inode) 3592 { 3593 struct f2fs_inode_info *fi = F2FS_I(inode); 3594 struct f2fs_map_blocks map; 3595 pgoff_t m_next_extent; 3596 loff_t end; 3597 int err; 3598 3599 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 3600 return -EOPNOTSUPP; 3601 3602 map.m_lblk = 0; 3603 map.m_pblk = 0; 3604 map.m_next_pgofs = NULL; 3605 map.m_next_extent = &m_next_extent; 3606 map.m_seg_type = NO_CHECK_TYPE; 3607 map.m_may_create = false; 3608 end = F2FS_BLK_ALIGN(i_size_read(inode)); 3609 3610 while (map.m_lblk < end) { 3611 map.m_len = end - map.m_lblk; 3612 3613 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 3614 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE); 3615 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 3616 if (err || !map.m_len) 3617 return err; 3618 3619 map.m_lblk = m_next_extent; 3620 } 3621 3622 return 0; 3623 } 3624 3625 static int f2fs_ioc_precache_extents(struct file *filp) 3626 { 3627 return f2fs_precache_extents(file_inode(filp)); 3628 } 3629 3630 static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) 3631 { 3632 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); 3633 __u64 block_count; 3634 3635 if (!capable(CAP_SYS_ADMIN)) 3636 return -EPERM; 3637 3638 if (f2fs_readonly(sbi->sb)) 3639 return -EROFS; 3640 3641 if (copy_from_user(&block_count, (void __user *)arg, 3642 sizeof(block_count))) 3643 return -EFAULT; 3644 3645 return f2fs_resize_fs(filp, block_count); 3646 } 3647 3648 static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg) 3649 { 3650 struct inode *inode = file_inode(filp); 3651 3652 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 3653 3654 if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) { 3655 f2fs_warn(F2FS_I_SB(inode), 3656 "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem", 3657 inode->i_ino); 3658 return -EOPNOTSUPP; 3659 } 3660 3661 return fsverity_ioctl_enable(filp, (const void __user *)arg); 3662 } 3663 3664 static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg) 3665 { 3666 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp)))) 3667 return -EOPNOTSUPP; 3668 3669 return fsverity_ioctl_measure(filp, (void __user *)arg); 3670 } 3671 3672 static int f2fs_ioc_read_verity_metadata(struct file *filp, unsigned long arg) 3673 { 3674 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp)))) 3675 return -EOPNOTSUPP; 3676 3677 return fsverity_ioctl_read_metadata(filp, (const void __user *)arg); 3678 } 3679 3680 static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg) 3681 { 3682 struct inode *inode = file_inode(filp); 3683 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3684 char *vbuf; 3685 int count; 3686 int err = 0; 3687 3688 vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL); 3689 if (!vbuf) 3690 return -ENOMEM; 3691 3692 f2fs_down_read(&sbi->sb_lock); 3693 count = utf16s_to_utf8s(sbi->raw_super->volume_name, 3694 ARRAY_SIZE(sbi->raw_super->volume_name), 3695 UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME); 3696 f2fs_up_read(&sbi->sb_lock); 3697 3698 if (copy_to_user((char __user *)arg, vbuf, 3699 min(FSLABEL_MAX, count))) 3700 err = -EFAULT; 3701 3702 kfree(vbuf); 3703 return err; 3704 } 3705 3706 static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg) 3707 { 3708 struct inode *inode = file_inode(filp); 3709 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3710 char *vbuf; 3711 int err = 0; 3712 3713 if (!capable(CAP_SYS_ADMIN)) 3714 return -EPERM; 3715 3716 vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX); 3717 if (IS_ERR(vbuf)) 3718 return PTR_ERR(vbuf); 3719 3720 err = mnt_want_write_file(filp); 3721 if (err) 3722 goto out; 3723 3724 f2fs_down_write(&sbi->sb_lock); 3725 3726 memset(sbi->raw_super->volume_name, 0, 3727 sizeof(sbi->raw_super->volume_name)); 3728 utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN, 3729 sbi->raw_super->volume_name, 3730 ARRAY_SIZE(sbi->raw_super->volume_name)); 3731 3732 err = f2fs_commit_super(sbi, false); 3733 3734 f2fs_up_write(&sbi->sb_lock); 3735 3736 mnt_drop_write_file(filp); 3737 out: 3738 kfree(vbuf); 3739 return err; 3740 } 3741 3742 static int f2fs_get_compress_blocks(struct inode *inode, __u64 *blocks) 3743 { 3744 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 3745 return -EOPNOTSUPP; 3746 3747 if (!f2fs_compressed_file(inode)) 3748 return -EINVAL; 3749 3750 *blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks); 3751 3752 return 0; 3753 } 3754 3755 static int f2fs_ioc_get_compress_blocks(struct file *filp, unsigned long arg) 3756 { 3757 struct inode *inode = file_inode(filp); 3758 __u64 blocks; 3759 int ret; 3760 3761 ret = f2fs_get_compress_blocks(inode, &blocks); 3762 if (ret < 0) 3763 return ret; 3764 3765 return put_user(blocks, (u64 __user *)arg); 3766 } 3767 3768 static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) 3769 { 3770 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 3771 unsigned int released_blocks = 0; 3772 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 3773 block_t blkaddr; 3774 int i; 3775 3776 for (i = 0; i < count; i++) { 3777 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3778 dn->ofs_in_node + i); 3779 3780 if (!__is_valid_data_blkaddr(blkaddr)) 3781 continue; 3782 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, 3783 DATA_GENERIC_ENHANCE))) 3784 return -EFSCORRUPTED; 3785 } 3786 3787 while (count) { 3788 int compr_blocks = 0; 3789 3790 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { 3791 blkaddr = f2fs_data_blkaddr(dn); 3792 3793 if (i == 0) { 3794 if (blkaddr == COMPRESS_ADDR) 3795 continue; 3796 dn->ofs_in_node += cluster_size; 3797 goto next; 3798 } 3799 3800 if (__is_valid_data_blkaddr(blkaddr)) 3801 compr_blocks++; 3802 3803 if (blkaddr != NEW_ADDR) 3804 continue; 3805 3806 f2fs_set_data_blkaddr(dn, NULL_ADDR); 3807 } 3808 3809 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false); 3810 dec_valid_block_count(sbi, dn->inode, 3811 cluster_size - compr_blocks); 3812 3813 released_blocks += cluster_size - compr_blocks; 3814 next: 3815 count -= cluster_size; 3816 } 3817 3818 return released_blocks; 3819 } 3820 3821 static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) 3822 { 3823 struct inode *inode = file_inode(filp); 3824 struct f2fs_inode_info *fi = F2FS_I(inode); 3825 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3826 pgoff_t page_idx = 0, last_idx; 3827 unsigned int released_blocks = 0; 3828 int ret; 3829 int writecount; 3830 3831 if (!f2fs_sb_has_compression(sbi)) 3832 return -EOPNOTSUPP; 3833 3834 if (f2fs_readonly(sbi->sb)) 3835 return -EROFS; 3836 3837 ret = mnt_want_write_file(filp); 3838 if (ret) 3839 return ret; 3840 3841 f2fs_balance_fs(sbi, true); 3842 3843 inode_lock(inode); 3844 3845 writecount = atomic_read(&inode->i_writecount); 3846 if ((filp->f_mode & FMODE_WRITE && writecount != 1) || 3847 (!(filp->f_mode & FMODE_WRITE) && writecount)) { 3848 ret = -EBUSY; 3849 goto out; 3850 } 3851 3852 if (!f2fs_compressed_file(inode) || 3853 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 3854 ret = -EINVAL; 3855 goto out; 3856 } 3857 3858 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 3859 if (ret) 3860 goto out; 3861 3862 if (!atomic_read(&fi->i_compr_blocks)) { 3863 ret = -EPERM; 3864 goto out; 3865 } 3866 3867 set_inode_flag(inode, FI_COMPRESS_RELEASED); 3868 inode_set_ctime_current(inode); 3869 f2fs_mark_inode_dirty_sync(inode, true); 3870 3871 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 3872 filemap_invalidate_lock(inode->i_mapping); 3873 3874 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 3875 3876 while (page_idx < last_idx) { 3877 struct dnode_of_data dn; 3878 pgoff_t end_offset, count; 3879 3880 f2fs_lock_op(sbi); 3881 3882 set_new_dnode(&dn, inode, NULL, NULL, 0); 3883 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); 3884 if (ret) { 3885 f2fs_unlock_op(sbi); 3886 if (ret == -ENOENT) { 3887 page_idx = f2fs_get_next_page_offset(&dn, 3888 page_idx); 3889 ret = 0; 3890 continue; 3891 } 3892 break; 3893 } 3894 3895 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 3896 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); 3897 count = round_up(count, fi->i_cluster_size); 3898 3899 ret = release_compress_blocks(&dn, count); 3900 3901 f2fs_put_dnode(&dn); 3902 3903 f2fs_unlock_op(sbi); 3904 3905 if (ret < 0) 3906 break; 3907 3908 page_idx += count; 3909 released_blocks += ret; 3910 } 3911 3912 filemap_invalidate_unlock(inode->i_mapping); 3913 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 3914 out: 3915 if (released_blocks) 3916 f2fs_update_time(sbi, REQ_TIME); 3917 inode_unlock(inode); 3918 3919 mnt_drop_write_file(filp); 3920 3921 if (ret >= 0) { 3922 ret = put_user(released_blocks, (u64 __user *)arg); 3923 } else if (released_blocks && 3924 atomic_read(&fi->i_compr_blocks)) { 3925 set_sbi_flag(sbi, SBI_NEED_FSCK); 3926 f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " 3927 "iblocks=%llu, released=%u, compr_blocks=%u, " 3928 "run fsck to fix.", 3929 __func__, inode->i_ino, inode->i_blocks, 3930 released_blocks, 3931 atomic_read(&fi->i_compr_blocks)); 3932 } 3933 3934 return ret; 3935 } 3936 3937 static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, 3938 unsigned int *reserved_blocks) 3939 { 3940 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 3941 int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 3942 block_t blkaddr; 3943 int i; 3944 3945 for (i = 0; i < count; i++) { 3946 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3947 dn->ofs_in_node + i); 3948 3949 if (!__is_valid_data_blkaddr(blkaddr)) 3950 continue; 3951 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, 3952 DATA_GENERIC_ENHANCE))) 3953 return -EFSCORRUPTED; 3954 } 3955 3956 while (count) { 3957 int compr_blocks = 0; 3958 blkcnt_t reserved = 0; 3959 blkcnt_t to_reserved; 3960 int ret; 3961 3962 for (i = 0; i < cluster_size; i++) { 3963 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 3964 dn->ofs_in_node + i); 3965 3966 if (i == 0) { 3967 if (blkaddr != COMPRESS_ADDR) { 3968 dn->ofs_in_node += cluster_size; 3969 goto next; 3970 } 3971 continue; 3972 } 3973 3974 /* 3975 * compressed cluster was not released due to it 3976 * fails in release_compress_blocks(), so NEW_ADDR 3977 * is a possible case. 3978 */ 3979 if (blkaddr == NEW_ADDR) { 3980 reserved++; 3981 continue; 3982 } 3983 if (__is_valid_data_blkaddr(blkaddr)) { 3984 compr_blocks++; 3985 continue; 3986 } 3987 } 3988 3989 to_reserved = cluster_size - compr_blocks - reserved; 3990 3991 /* for the case all blocks in cluster were reserved */ 3992 if (reserved && to_reserved == 1) { 3993 dn->ofs_in_node += cluster_size; 3994 goto next; 3995 } 3996 3997 ret = inc_valid_block_count(sbi, dn->inode, 3998 &to_reserved, false); 3999 if (unlikely(ret)) 4000 return ret; 4001 4002 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { 4003 if (f2fs_data_blkaddr(dn) == NULL_ADDR) 4004 f2fs_set_data_blkaddr(dn, NEW_ADDR); 4005 } 4006 4007 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); 4008 4009 *reserved_blocks += to_reserved; 4010 next: 4011 count -= cluster_size; 4012 } 4013 4014 return 0; 4015 } 4016 4017 static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) 4018 { 4019 struct inode *inode = file_inode(filp); 4020 struct f2fs_inode_info *fi = F2FS_I(inode); 4021 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4022 pgoff_t page_idx = 0, last_idx; 4023 unsigned int reserved_blocks = 0; 4024 int ret; 4025 4026 if (!f2fs_sb_has_compression(sbi)) 4027 return -EOPNOTSUPP; 4028 4029 if (f2fs_readonly(sbi->sb)) 4030 return -EROFS; 4031 4032 ret = mnt_want_write_file(filp); 4033 if (ret) 4034 return ret; 4035 4036 f2fs_balance_fs(sbi, true); 4037 4038 inode_lock(inode); 4039 4040 if (!f2fs_compressed_file(inode) || 4041 !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4042 ret = -EINVAL; 4043 goto unlock_inode; 4044 } 4045 4046 if (atomic_read(&fi->i_compr_blocks)) 4047 goto unlock_inode; 4048 4049 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 4050 filemap_invalidate_lock(inode->i_mapping); 4051 4052 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4053 4054 while (page_idx < last_idx) { 4055 struct dnode_of_data dn; 4056 pgoff_t end_offset, count; 4057 4058 f2fs_lock_op(sbi); 4059 4060 set_new_dnode(&dn, inode, NULL, NULL, 0); 4061 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); 4062 if (ret) { 4063 f2fs_unlock_op(sbi); 4064 if (ret == -ENOENT) { 4065 page_idx = f2fs_get_next_page_offset(&dn, 4066 page_idx); 4067 ret = 0; 4068 continue; 4069 } 4070 break; 4071 } 4072 4073 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 4074 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); 4075 count = round_up(count, fi->i_cluster_size); 4076 4077 ret = reserve_compress_blocks(&dn, count, &reserved_blocks); 4078 4079 f2fs_put_dnode(&dn); 4080 4081 f2fs_unlock_op(sbi); 4082 4083 if (ret < 0) 4084 break; 4085 4086 page_idx += count; 4087 } 4088 4089 filemap_invalidate_unlock(inode->i_mapping); 4090 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 4091 4092 if (!ret) { 4093 clear_inode_flag(inode, FI_COMPRESS_RELEASED); 4094 inode_set_ctime_current(inode); 4095 f2fs_mark_inode_dirty_sync(inode, true); 4096 } 4097 unlock_inode: 4098 if (reserved_blocks) 4099 f2fs_update_time(sbi, REQ_TIME); 4100 inode_unlock(inode); 4101 mnt_drop_write_file(filp); 4102 4103 if (!ret) { 4104 ret = put_user(reserved_blocks, (u64 __user *)arg); 4105 } else if (reserved_blocks && 4106 atomic_read(&fi->i_compr_blocks)) { 4107 set_sbi_flag(sbi, SBI_NEED_FSCK); 4108 f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx " 4109 "iblocks=%llu, reserved=%u, compr_blocks=%u, " 4110 "run fsck to fix.", 4111 __func__, inode->i_ino, inode->i_blocks, 4112 reserved_blocks, 4113 atomic_read(&fi->i_compr_blocks)); 4114 } 4115 4116 return ret; 4117 } 4118 4119 static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode, 4120 pgoff_t off, block_t block, block_t len, u32 flags) 4121 { 4122 sector_t sector = SECTOR_FROM_BLOCK(block); 4123 sector_t nr_sects = SECTOR_FROM_BLOCK(len); 4124 int ret = 0; 4125 4126 if (flags & F2FS_TRIM_FILE_DISCARD) { 4127 if (bdev_max_secure_erase_sectors(bdev)) 4128 ret = blkdev_issue_secure_erase(bdev, sector, nr_sects, 4129 GFP_NOFS); 4130 else 4131 ret = blkdev_issue_discard(bdev, sector, nr_sects, 4132 GFP_NOFS); 4133 } 4134 4135 if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) { 4136 if (IS_ENCRYPTED(inode)) 4137 ret = fscrypt_zeroout_range(inode, off, block, len); 4138 else 4139 ret = blkdev_issue_zeroout(bdev, sector, nr_sects, 4140 GFP_NOFS, 0); 4141 } 4142 4143 return ret; 4144 } 4145 4146 static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) 4147 { 4148 struct inode *inode = file_inode(filp); 4149 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4150 struct address_space *mapping = inode->i_mapping; 4151 struct block_device *prev_bdev = NULL; 4152 struct f2fs_sectrim_range range; 4153 pgoff_t index, pg_end, prev_index = 0; 4154 block_t prev_block = 0, len = 0; 4155 loff_t end_addr; 4156 bool to_end = false; 4157 int ret = 0; 4158 4159 if (!(filp->f_mode & FMODE_WRITE)) 4160 return -EBADF; 4161 4162 if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg, 4163 sizeof(range))) 4164 return -EFAULT; 4165 4166 if (range.flags == 0 || (range.flags & ~F2FS_TRIM_FILE_MASK) || 4167 !S_ISREG(inode->i_mode)) 4168 return -EINVAL; 4169 4170 if (((range.flags & F2FS_TRIM_FILE_DISCARD) && 4171 !f2fs_hw_support_discard(sbi)) || 4172 ((range.flags & F2FS_TRIM_FILE_ZEROOUT) && 4173 IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi))) 4174 return -EOPNOTSUPP; 4175 4176 ret = mnt_want_write_file(filp); 4177 if (ret) 4178 return ret; 4179 inode_lock(inode); 4180 4181 if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) || 4182 range.start >= inode->i_size) { 4183 ret = -EINVAL; 4184 goto err; 4185 } 4186 4187 if (range.len == 0) 4188 goto err; 4189 4190 if (inode->i_size - range.start > range.len) { 4191 end_addr = range.start + range.len; 4192 } else { 4193 end_addr = range.len == (u64)-1 ? 4194 sbi->sb->s_maxbytes : inode->i_size; 4195 to_end = true; 4196 } 4197 4198 if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) || 4199 (!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) { 4200 ret = -EINVAL; 4201 goto err; 4202 } 4203 4204 index = F2FS_BYTES_TO_BLK(range.start); 4205 pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE); 4206 4207 ret = f2fs_convert_inline_inode(inode); 4208 if (ret) 4209 goto err; 4210 4211 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 4212 filemap_invalidate_lock(mapping); 4213 4214 ret = filemap_write_and_wait_range(mapping, range.start, 4215 to_end ? LLONG_MAX : end_addr - 1); 4216 if (ret) 4217 goto out; 4218 4219 truncate_inode_pages_range(mapping, range.start, 4220 to_end ? -1 : end_addr - 1); 4221 4222 while (index < pg_end) { 4223 struct dnode_of_data dn; 4224 pgoff_t end_offset, count; 4225 int i; 4226 4227 set_new_dnode(&dn, inode, NULL, NULL, 0); 4228 ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); 4229 if (ret) { 4230 if (ret == -ENOENT) { 4231 index = f2fs_get_next_page_offset(&dn, index); 4232 continue; 4233 } 4234 goto out; 4235 } 4236 4237 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); 4238 count = min(end_offset - dn.ofs_in_node, pg_end - index); 4239 for (i = 0; i < count; i++, index++, dn.ofs_in_node++) { 4240 struct block_device *cur_bdev; 4241 block_t blkaddr = f2fs_data_blkaddr(&dn); 4242 4243 if (!__is_valid_data_blkaddr(blkaddr)) 4244 continue; 4245 4246 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, 4247 DATA_GENERIC_ENHANCE)) { 4248 ret = -EFSCORRUPTED; 4249 f2fs_put_dnode(&dn); 4250 goto out; 4251 } 4252 4253 cur_bdev = f2fs_target_device(sbi, blkaddr, NULL); 4254 if (f2fs_is_multi_device(sbi)) { 4255 int di = f2fs_target_device_index(sbi, blkaddr); 4256 4257 blkaddr -= FDEV(di).start_blk; 4258 } 4259 4260 if (len) { 4261 if (prev_bdev == cur_bdev && 4262 index == prev_index + len && 4263 blkaddr == prev_block + len) { 4264 len++; 4265 } else { 4266 ret = f2fs_secure_erase(prev_bdev, 4267 inode, prev_index, prev_block, 4268 len, range.flags); 4269 if (ret) { 4270 f2fs_put_dnode(&dn); 4271 goto out; 4272 } 4273 4274 len = 0; 4275 } 4276 } 4277 4278 if (!len) { 4279 prev_bdev = cur_bdev; 4280 prev_index = index; 4281 prev_block = blkaddr; 4282 len = 1; 4283 } 4284 } 4285 4286 f2fs_put_dnode(&dn); 4287 4288 if (fatal_signal_pending(current)) { 4289 ret = -EINTR; 4290 goto out; 4291 } 4292 cond_resched(); 4293 } 4294 4295 if (len) 4296 ret = f2fs_secure_erase(prev_bdev, inode, prev_index, 4297 prev_block, len, range.flags); 4298 f2fs_update_time(sbi, REQ_TIME); 4299 out: 4300 filemap_invalidate_unlock(mapping); 4301 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 4302 err: 4303 inode_unlock(inode); 4304 mnt_drop_write_file(filp); 4305 4306 return ret; 4307 } 4308 4309 static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg) 4310 { 4311 struct inode *inode = file_inode(filp); 4312 struct f2fs_comp_option option; 4313 4314 if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) 4315 return -EOPNOTSUPP; 4316 4317 inode_lock_shared(inode); 4318 4319 if (!f2fs_compressed_file(inode)) { 4320 inode_unlock_shared(inode); 4321 return -ENODATA; 4322 } 4323 4324 option.algorithm = F2FS_I(inode)->i_compress_algorithm; 4325 option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size; 4326 4327 inode_unlock_shared(inode); 4328 4329 if (copy_to_user((struct f2fs_comp_option __user *)arg, &option, 4330 sizeof(option))) 4331 return -EFAULT; 4332 4333 return 0; 4334 } 4335 4336 static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) 4337 { 4338 struct inode *inode = file_inode(filp); 4339 struct f2fs_inode_info *fi = F2FS_I(inode); 4340 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4341 struct f2fs_comp_option option; 4342 int ret = 0; 4343 4344 if (!f2fs_sb_has_compression(sbi)) 4345 return -EOPNOTSUPP; 4346 4347 if (!(filp->f_mode & FMODE_WRITE)) 4348 return -EBADF; 4349 4350 if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg, 4351 sizeof(option))) 4352 return -EFAULT; 4353 4354 if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || 4355 option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || 4356 option.algorithm >= COMPRESS_MAX) 4357 return -EINVAL; 4358 4359 ret = mnt_want_write_file(filp); 4360 if (ret) 4361 return ret; 4362 inode_lock(inode); 4363 4364 f2fs_down_write(&F2FS_I(inode)->i_sem); 4365 if (!f2fs_compressed_file(inode)) { 4366 ret = -EINVAL; 4367 goto out; 4368 } 4369 4370 if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) { 4371 ret = -EBUSY; 4372 goto out; 4373 } 4374 4375 if (F2FS_HAS_BLOCKS(inode)) { 4376 ret = -EFBIG; 4377 goto out; 4378 } 4379 4380 fi->i_compress_algorithm = option.algorithm; 4381 fi->i_log_cluster_size = option.log_cluster_size; 4382 fi->i_cluster_size = BIT(option.log_cluster_size); 4383 /* Set default level */ 4384 if (fi->i_compress_algorithm == COMPRESS_ZSTD) 4385 fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; 4386 else 4387 fi->i_compress_level = 0; 4388 /* Adjust mount option level */ 4389 if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm && 4390 F2FS_OPTION(sbi).compress_level) 4391 fi->i_compress_level = F2FS_OPTION(sbi).compress_level; 4392 f2fs_mark_inode_dirty_sync(inode, true); 4393 4394 if (!f2fs_is_compress_backend_ready(inode)) 4395 f2fs_warn(sbi, "compression algorithm is successfully set, " 4396 "but current kernel doesn't support this algorithm."); 4397 out: 4398 f2fs_up_write(&fi->i_sem); 4399 inode_unlock(inode); 4400 mnt_drop_write_file(filp); 4401 4402 return ret; 4403 } 4404 4405 static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) 4406 { 4407 DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx); 4408 struct address_space *mapping = inode->i_mapping; 4409 struct folio *folio; 4410 pgoff_t redirty_idx = page_idx; 4411 int page_len = 0, ret = 0; 4412 4413 page_cache_ra_unbounded(&ractl, len, 0); 4414 4415 do { 4416 folio = read_cache_folio(mapping, page_idx, NULL, NULL); 4417 if (IS_ERR(folio)) { 4418 ret = PTR_ERR(folio); 4419 break; 4420 } 4421 page_len += folio_nr_pages(folio) - (page_idx - folio->index); 4422 page_idx = folio_next_index(folio); 4423 } while (page_len < len); 4424 4425 do { 4426 folio = filemap_lock_folio(mapping, redirty_idx); 4427 4428 /* It will never fail, when folio has pinned above */ 4429 f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(folio)); 4430 4431 f2fs_folio_wait_writeback(folio, DATA, true, true); 4432 4433 folio_mark_dirty(folio); 4434 folio_set_f2fs_gcing(folio); 4435 redirty_idx = folio_next_index(folio); 4436 folio_unlock(folio); 4437 folio_put_refs(folio, 2); 4438 } while (redirty_idx < page_idx); 4439 4440 return ret; 4441 } 4442 4443 static int f2fs_ioc_decompress_file(struct file *filp) 4444 { 4445 struct inode *inode = file_inode(filp); 4446 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4447 struct f2fs_inode_info *fi = F2FS_I(inode); 4448 pgoff_t page_idx = 0, last_idx, cluster_idx; 4449 int ret; 4450 4451 if (!f2fs_sb_has_compression(sbi) || 4452 F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) 4453 return -EOPNOTSUPP; 4454 4455 if (!(filp->f_mode & FMODE_WRITE)) 4456 return -EBADF; 4457 4458 f2fs_balance_fs(sbi, true); 4459 4460 ret = mnt_want_write_file(filp); 4461 if (ret) 4462 return ret; 4463 inode_lock(inode); 4464 4465 if (!f2fs_is_compress_backend_ready(inode)) { 4466 ret = -EOPNOTSUPP; 4467 goto out; 4468 } 4469 4470 if (!f2fs_compressed_file(inode) || 4471 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4472 ret = -EINVAL; 4473 goto out; 4474 } 4475 4476 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 4477 if (ret) 4478 goto out; 4479 4480 if (!atomic_read(&fi->i_compr_blocks)) 4481 goto out; 4482 4483 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4484 last_idx >>= fi->i_log_cluster_size; 4485 4486 for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { 4487 page_idx = cluster_idx << fi->i_log_cluster_size; 4488 4489 if (!f2fs_is_compressed_cluster(inode, page_idx)) 4490 continue; 4491 4492 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); 4493 if (ret < 0) 4494 break; 4495 4496 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { 4497 ret = filemap_fdatawrite(inode->i_mapping); 4498 if (ret < 0) 4499 break; 4500 } 4501 4502 cond_resched(); 4503 if (fatal_signal_pending(current)) { 4504 ret = -EINTR; 4505 break; 4506 } 4507 } 4508 4509 if (!ret) 4510 ret = filemap_write_and_wait_range(inode->i_mapping, 0, 4511 LLONG_MAX); 4512 4513 if (ret) 4514 f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.", 4515 __func__, ret); 4516 f2fs_update_time(sbi, REQ_TIME); 4517 out: 4518 inode_unlock(inode); 4519 mnt_drop_write_file(filp); 4520 4521 return ret; 4522 } 4523 4524 static int f2fs_ioc_compress_file(struct file *filp) 4525 { 4526 struct inode *inode = file_inode(filp); 4527 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4528 struct f2fs_inode_info *fi = F2FS_I(inode); 4529 pgoff_t page_idx = 0, last_idx, cluster_idx; 4530 int ret; 4531 4532 if (!f2fs_sb_has_compression(sbi) || 4533 F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) 4534 return -EOPNOTSUPP; 4535 4536 if (!(filp->f_mode & FMODE_WRITE)) 4537 return -EBADF; 4538 4539 f2fs_balance_fs(sbi, true); 4540 4541 ret = mnt_want_write_file(filp); 4542 if (ret) 4543 return ret; 4544 inode_lock(inode); 4545 4546 if (!f2fs_is_compress_backend_ready(inode)) { 4547 ret = -EOPNOTSUPP; 4548 goto out; 4549 } 4550 4551 if (!f2fs_compressed_file(inode) || 4552 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { 4553 ret = -EINVAL; 4554 goto out; 4555 } 4556 4557 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 4558 if (ret) 4559 goto out; 4560 4561 set_inode_flag(inode, FI_ENABLE_COMPRESS); 4562 4563 last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 4564 last_idx >>= fi->i_log_cluster_size; 4565 4566 for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { 4567 page_idx = cluster_idx << fi->i_log_cluster_size; 4568 4569 if (f2fs_is_sparse_cluster(inode, page_idx)) 4570 continue; 4571 4572 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); 4573 if (ret < 0) 4574 break; 4575 4576 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { 4577 ret = filemap_fdatawrite(inode->i_mapping); 4578 if (ret < 0) 4579 break; 4580 } 4581 4582 cond_resched(); 4583 if (fatal_signal_pending(current)) { 4584 ret = -EINTR; 4585 break; 4586 } 4587 } 4588 4589 if (!ret) 4590 ret = filemap_write_and_wait_range(inode->i_mapping, 0, 4591 LLONG_MAX); 4592 4593 clear_inode_flag(inode, FI_ENABLE_COMPRESS); 4594 4595 if (ret) 4596 f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.", 4597 __func__, ret); 4598 f2fs_update_time(sbi, REQ_TIME); 4599 out: 4600 inode_unlock(inode); 4601 mnt_drop_write_file(filp); 4602 4603 return ret; 4604 } 4605 4606 static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 4607 { 4608 switch (cmd) { 4609 case FS_IOC_GETVERSION: 4610 return f2fs_ioc_getversion(filp, arg); 4611 case F2FS_IOC_START_ATOMIC_WRITE: 4612 return f2fs_ioc_start_atomic_write(filp, false); 4613 case F2FS_IOC_START_ATOMIC_REPLACE: 4614 return f2fs_ioc_start_atomic_write(filp, true); 4615 case F2FS_IOC_COMMIT_ATOMIC_WRITE: 4616 return f2fs_ioc_commit_atomic_write(filp); 4617 case F2FS_IOC_ABORT_ATOMIC_WRITE: 4618 return f2fs_ioc_abort_atomic_write(filp); 4619 case F2FS_IOC_START_VOLATILE_WRITE: 4620 case F2FS_IOC_RELEASE_VOLATILE_WRITE: 4621 return -EOPNOTSUPP; 4622 case F2FS_IOC_SHUTDOWN: 4623 return f2fs_ioc_shutdown(filp, arg); 4624 case FITRIM: 4625 return f2fs_ioc_fitrim(filp, arg); 4626 case FS_IOC_SET_ENCRYPTION_POLICY: 4627 return f2fs_ioc_set_encryption_policy(filp, arg); 4628 case FS_IOC_GET_ENCRYPTION_POLICY: 4629 return f2fs_ioc_get_encryption_policy(filp, arg); 4630 case FS_IOC_GET_ENCRYPTION_PWSALT: 4631 return f2fs_ioc_get_encryption_pwsalt(filp, arg); 4632 case FS_IOC_GET_ENCRYPTION_POLICY_EX: 4633 return f2fs_ioc_get_encryption_policy_ex(filp, arg); 4634 case FS_IOC_ADD_ENCRYPTION_KEY: 4635 return f2fs_ioc_add_encryption_key(filp, arg); 4636 case FS_IOC_REMOVE_ENCRYPTION_KEY: 4637 return f2fs_ioc_remove_encryption_key(filp, arg); 4638 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: 4639 return f2fs_ioc_remove_encryption_key_all_users(filp, arg); 4640 case FS_IOC_GET_ENCRYPTION_KEY_STATUS: 4641 return f2fs_ioc_get_encryption_key_status(filp, arg); 4642 case FS_IOC_GET_ENCRYPTION_NONCE: 4643 return f2fs_ioc_get_encryption_nonce(filp, arg); 4644 case F2FS_IOC_GARBAGE_COLLECT: 4645 return f2fs_ioc_gc(filp, arg); 4646 case F2FS_IOC_GARBAGE_COLLECT_RANGE: 4647 return f2fs_ioc_gc_range(filp, arg); 4648 case F2FS_IOC_WRITE_CHECKPOINT: 4649 return f2fs_ioc_write_checkpoint(filp); 4650 case F2FS_IOC_DEFRAGMENT: 4651 return f2fs_ioc_defragment(filp, arg); 4652 case F2FS_IOC_MOVE_RANGE: 4653 return f2fs_ioc_move_range(filp, arg); 4654 case F2FS_IOC_FLUSH_DEVICE: 4655 return f2fs_ioc_flush_device(filp, arg); 4656 case F2FS_IOC_GET_FEATURES: 4657 return f2fs_ioc_get_features(filp, arg); 4658 case F2FS_IOC_GET_PIN_FILE: 4659 return f2fs_ioc_get_pin_file(filp, arg); 4660 case F2FS_IOC_SET_PIN_FILE: 4661 return f2fs_ioc_set_pin_file(filp, arg); 4662 case F2FS_IOC_PRECACHE_EXTENTS: 4663 return f2fs_ioc_precache_extents(filp); 4664 case F2FS_IOC_RESIZE_FS: 4665 return f2fs_ioc_resize_fs(filp, arg); 4666 case FS_IOC_ENABLE_VERITY: 4667 return f2fs_ioc_enable_verity(filp, arg); 4668 case FS_IOC_MEASURE_VERITY: 4669 return f2fs_ioc_measure_verity(filp, arg); 4670 case FS_IOC_READ_VERITY_METADATA: 4671 return f2fs_ioc_read_verity_metadata(filp, arg); 4672 case FS_IOC_GETFSLABEL: 4673 return f2fs_ioc_getfslabel(filp, arg); 4674 case FS_IOC_SETFSLABEL: 4675 return f2fs_ioc_setfslabel(filp, arg); 4676 case F2FS_IOC_GET_COMPRESS_BLOCKS: 4677 return f2fs_ioc_get_compress_blocks(filp, arg); 4678 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: 4679 return f2fs_release_compress_blocks(filp, arg); 4680 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: 4681 return f2fs_reserve_compress_blocks(filp, arg); 4682 case F2FS_IOC_SEC_TRIM_FILE: 4683 return f2fs_sec_trim_file(filp, arg); 4684 case F2FS_IOC_GET_COMPRESS_OPTION: 4685 return f2fs_ioc_get_compress_option(filp, arg); 4686 case F2FS_IOC_SET_COMPRESS_OPTION: 4687 return f2fs_ioc_set_compress_option(filp, arg); 4688 case F2FS_IOC_DECOMPRESS_FILE: 4689 return f2fs_ioc_decompress_file(filp); 4690 case F2FS_IOC_COMPRESS_FILE: 4691 return f2fs_ioc_compress_file(filp); 4692 case F2FS_IOC_GET_DEV_ALIAS_FILE: 4693 return f2fs_ioc_get_dev_alias_file(filp, arg); 4694 case F2FS_IOC_IO_PRIO: 4695 return f2fs_ioc_io_prio(filp, arg); 4696 default: 4697 return -ENOTTY; 4698 } 4699 } 4700 4701 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 4702 { 4703 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) 4704 return -EIO; 4705 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp)))) 4706 return -ENOSPC; 4707 4708 return __f2fs_ioctl(filp, cmd, arg); 4709 } 4710 4711 /* 4712 * Return %true if the given read or write request should use direct I/O, or 4713 * %false if it should use buffered I/O. 4714 */ 4715 static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb, 4716 struct iov_iter *iter) 4717 { 4718 unsigned int align; 4719 4720 if (!(iocb->ki_flags & IOCB_DIRECT)) 4721 return false; 4722 4723 if (f2fs_force_buffered_io(inode, iov_iter_rw(iter))) 4724 return false; 4725 4726 /* 4727 * Direct I/O not aligned to the disk's logical_block_size will be 4728 * attempted, but will fail with -EINVAL. 4729 * 4730 * f2fs additionally requires that direct I/O be aligned to the 4731 * filesystem block size, which is often a stricter requirement. 4732 * However, f2fs traditionally falls back to buffered I/O on requests 4733 * that are logical_block_size-aligned but not fs-block aligned. 4734 * 4735 * The below logic implements this behavior. 4736 */ 4737 align = iocb->ki_pos | iov_iter_alignment(iter); 4738 if (!IS_ALIGNED(align, i_blocksize(inode)) && 4739 IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev))) 4740 return false; 4741 4742 return true; 4743 } 4744 4745 static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error, 4746 unsigned int flags) 4747 { 4748 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); 4749 4750 dec_page_count(sbi, F2FS_DIO_READ); 4751 if (error) 4752 return error; 4753 f2fs_update_iostat(sbi, NULL, APP_DIRECT_READ_IO, size); 4754 return 0; 4755 } 4756 4757 static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = { 4758 .end_io = f2fs_dio_read_end_io, 4759 }; 4760 4761 static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) 4762 { 4763 struct file *file = iocb->ki_filp; 4764 struct inode *inode = file_inode(file); 4765 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4766 struct f2fs_inode_info *fi = F2FS_I(inode); 4767 const loff_t pos = iocb->ki_pos; 4768 const size_t count = iov_iter_count(to); 4769 struct iomap_dio *dio; 4770 ssize_t ret; 4771 4772 if (count == 0) 4773 return 0; /* skip atime update */ 4774 4775 trace_f2fs_direct_IO_enter(inode, iocb, count, READ); 4776 4777 if (iocb->ki_flags & IOCB_NOWAIT) { 4778 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { 4779 ret = -EAGAIN; 4780 goto out; 4781 } 4782 } else { 4783 f2fs_down_read(&fi->i_gc_rwsem[READ]); 4784 } 4785 4786 /* dio is not compatible w/ atomic file */ 4787 if (f2fs_is_atomic_file(inode)) { 4788 f2fs_up_read(&fi->i_gc_rwsem[READ]); 4789 ret = -EOPNOTSUPP; 4790 goto out; 4791 } 4792 4793 /* 4794 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of 4795 * the higher-level function iomap_dio_rw() in order to ensure that the 4796 * F2FS_DIO_READ counter will be decremented correctly in all cases. 4797 */ 4798 inc_page_count(sbi, F2FS_DIO_READ); 4799 dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops, 4800 &f2fs_iomap_dio_read_ops, 0, NULL, 0); 4801 if (IS_ERR_OR_NULL(dio)) { 4802 ret = PTR_ERR_OR_ZERO(dio); 4803 if (ret != -EIOCBQUEUED) 4804 dec_page_count(sbi, F2FS_DIO_READ); 4805 } else { 4806 ret = iomap_dio_complete(dio); 4807 } 4808 4809 f2fs_up_read(&fi->i_gc_rwsem[READ]); 4810 4811 file_accessed(file); 4812 out: 4813 trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret); 4814 return ret; 4815 } 4816 4817 static void f2fs_trace_rw_file_path(struct file *file, loff_t pos, size_t count, 4818 int rw) 4819 { 4820 struct inode *inode = file_inode(file); 4821 char *buf, *path; 4822 4823 buf = f2fs_getname(F2FS_I_SB(inode)); 4824 if (!buf) 4825 return; 4826 path = dentry_path_raw(file_dentry(file), buf, PATH_MAX); 4827 if (IS_ERR(path)) 4828 goto free_buf; 4829 if (rw == WRITE) 4830 trace_f2fs_datawrite_start(inode, pos, count, 4831 current->pid, path, current->comm); 4832 else 4833 trace_f2fs_dataread_start(inode, pos, count, 4834 current->pid, path, current->comm); 4835 free_buf: 4836 f2fs_putname(buf); 4837 } 4838 4839 static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 4840 { 4841 struct inode *inode = file_inode(iocb->ki_filp); 4842 const loff_t pos = iocb->ki_pos; 4843 ssize_t ret; 4844 bool dio; 4845 4846 if (!f2fs_is_compress_backend_ready(inode)) 4847 return -EOPNOTSUPP; 4848 4849 if (trace_f2fs_dataread_start_enabled()) 4850 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, 4851 iov_iter_count(to), READ); 4852 4853 dio = f2fs_should_use_dio(inode, iocb, to); 4854 4855 /* In LFS mode, if there is inflight dio, wait for its completion */ 4856 if (f2fs_lfs_mode(F2FS_I_SB(inode)) && 4857 get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) && 4858 (!f2fs_is_pinned_file(inode) || !dio)) 4859 inode_dio_wait(inode); 4860 4861 if (dio) { 4862 ret = f2fs_dio_read_iter(iocb, to); 4863 } else { 4864 ret = filemap_read(iocb, to, 0); 4865 if (ret > 0) 4866 f2fs_update_iostat(F2FS_I_SB(inode), inode, 4867 APP_BUFFERED_READ_IO, ret); 4868 } 4869 trace_f2fs_dataread_end(inode, pos, ret); 4870 return ret; 4871 } 4872 4873 static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos, 4874 struct pipe_inode_info *pipe, 4875 size_t len, unsigned int flags) 4876 { 4877 struct inode *inode = file_inode(in); 4878 const loff_t pos = *ppos; 4879 ssize_t ret; 4880 4881 if (!f2fs_is_compress_backend_ready(inode)) 4882 return -EOPNOTSUPP; 4883 4884 if (trace_f2fs_dataread_start_enabled()) 4885 f2fs_trace_rw_file_path(in, pos, len, READ); 4886 4887 ret = filemap_splice_read(in, ppos, pipe, len, flags); 4888 if (ret > 0) 4889 f2fs_update_iostat(F2FS_I_SB(inode), inode, 4890 APP_BUFFERED_READ_IO, ret); 4891 4892 trace_f2fs_dataread_end(inode, pos, ret); 4893 return ret; 4894 } 4895 4896 static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from) 4897 { 4898 struct file *file = iocb->ki_filp; 4899 struct inode *inode = file_inode(file); 4900 ssize_t count; 4901 int err; 4902 4903 if (IS_IMMUTABLE(inode)) 4904 return -EPERM; 4905 4906 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) 4907 return -EPERM; 4908 4909 count = generic_write_checks(iocb, from); 4910 if (count <= 0) 4911 return count; 4912 4913 err = file_modified(file); 4914 if (err) 4915 return err; 4916 4917 filemap_invalidate_lock(inode->i_mapping); 4918 f2fs_zero_post_eof_page(inode, iocb->ki_pos + iov_iter_count(from)); 4919 filemap_invalidate_unlock(inode->i_mapping); 4920 return count; 4921 } 4922 4923 /* 4924 * Preallocate blocks for a write request, if it is possible and helpful to do 4925 * so. Returns a positive number if blocks may have been preallocated, 0 if no 4926 * blocks were preallocated, or a negative errno value if something went 4927 * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the 4928 * requested blocks (not just some of them) have been allocated. 4929 */ 4930 static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, 4931 bool dio) 4932 { 4933 struct inode *inode = file_inode(iocb->ki_filp); 4934 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 4935 const loff_t pos = iocb->ki_pos; 4936 const size_t count = iov_iter_count(iter); 4937 struct f2fs_map_blocks map = {}; 4938 int flag; 4939 int ret; 4940 4941 /* If it will be an out-of-place direct write, don't bother. */ 4942 if (dio && f2fs_lfs_mode(sbi)) 4943 return 0; 4944 /* 4945 * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into 4946 * buffered IO, if DIO meets any holes. 4947 */ 4948 if (dio && i_size_read(inode) && 4949 (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode)))) 4950 return 0; 4951 4952 /* No-wait I/O can't allocate blocks. */ 4953 if (iocb->ki_flags & IOCB_NOWAIT) 4954 return 0; 4955 4956 /* If it will be a short write, don't bother. */ 4957 if (fault_in_iov_iter_readable(iter, count)) 4958 return 0; 4959 4960 if (f2fs_has_inline_data(inode)) { 4961 /* If the data will fit inline, don't bother. */ 4962 if (pos + count <= MAX_INLINE_DATA(inode)) 4963 return 0; 4964 ret = f2fs_convert_inline_inode(inode); 4965 if (ret) 4966 return ret; 4967 } 4968 4969 /* Do not preallocate blocks that will be written partially in 4KB. */ 4970 map.m_lblk = F2FS_BLK_ALIGN(pos); 4971 map.m_len = F2FS_BYTES_TO_BLK(pos + count); 4972 if (map.m_len > map.m_lblk) 4973 map.m_len -= map.m_lblk; 4974 else 4975 return 0; 4976 4977 if (!IS_DEVICE_ALIASING(inode)) 4978 map.m_may_create = true; 4979 if (dio) { 4980 map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, 4981 inode->i_write_hint); 4982 flag = F2FS_GET_BLOCK_PRE_DIO; 4983 } else { 4984 map.m_seg_type = NO_CHECK_TYPE; 4985 flag = F2FS_GET_BLOCK_PRE_AIO; 4986 } 4987 4988 ret = f2fs_map_blocks(inode, &map, flag); 4989 /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */ 4990 if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0)) 4991 return ret; 4992 if (ret == 0) 4993 set_inode_flag(inode, FI_PREALLOCATED_ALL); 4994 return map.m_len; 4995 } 4996 4997 static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb, 4998 struct iov_iter *from) 4999 { 5000 struct file *file = iocb->ki_filp; 5001 struct inode *inode = file_inode(file); 5002 ssize_t ret; 5003 5004 if (iocb->ki_flags & IOCB_NOWAIT) 5005 return -EOPNOTSUPP; 5006 5007 ret = generic_perform_write(iocb, from); 5008 5009 if (ret > 0) { 5010 f2fs_update_iostat(F2FS_I_SB(inode), inode, 5011 APP_BUFFERED_IO, ret); 5012 } 5013 return ret; 5014 } 5015 5016 static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error, 5017 unsigned int flags) 5018 { 5019 struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); 5020 5021 dec_page_count(sbi, F2FS_DIO_WRITE); 5022 if (error) 5023 return error; 5024 f2fs_update_time(sbi, REQ_TIME); 5025 f2fs_update_iostat(sbi, NULL, APP_DIRECT_IO, size); 5026 return 0; 5027 } 5028 5029 static void f2fs_dio_write_submit_io(const struct iomap_iter *iter, 5030 struct bio *bio, loff_t file_offset) 5031 { 5032 struct inode *inode = iter->inode; 5033 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 5034 enum log_type type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint); 5035 enum temp_type temp = f2fs_get_segment_temp(sbi, type); 5036 5037 bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp); 5038 submit_bio(bio); 5039 } 5040 5041 static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = { 5042 .end_io = f2fs_dio_write_end_io, 5043 .submit_io = f2fs_dio_write_submit_io, 5044 }; 5045 5046 static void f2fs_flush_buffered_write(struct address_space *mapping, 5047 loff_t start_pos, loff_t end_pos) 5048 { 5049 int ret; 5050 5051 ret = filemap_write_and_wait_range(mapping, start_pos, end_pos); 5052 if (ret < 0) 5053 return; 5054 invalidate_mapping_pages(mapping, 5055 start_pos >> PAGE_SHIFT, 5056 end_pos >> PAGE_SHIFT); 5057 } 5058 5059 static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, 5060 bool *may_need_sync) 5061 { 5062 struct file *file = iocb->ki_filp; 5063 struct inode *inode = file_inode(file); 5064 struct f2fs_inode_info *fi = F2FS_I(inode); 5065 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 5066 const bool do_opu = f2fs_lfs_mode(sbi); 5067 const loff_t pos = iocb->ki_pos; 5068 const ssize_t count = iov_iter_count(from); 5069 unsigned int dio_flags; 5070 struct iomap_dio *dio; 5071 ssize_t ret; 5072 5073 trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE); 5074 5075 if (iocb->ki_flags & IOCB_NOWAIT) { 5076 /* f2fs_convert_inline_inode() and block allocation can block */ 5077 if (f2fs_has_inline_data(inode) || 5078 !f2fs_overwrite_io(inode, pos, count)) { 5079 ret = -EAGAIN; 5080 goto out; 5081 } 5082 5083 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) { 5084 ret = -EAGAIN; 5085 goto out; 5086 } 5087 if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { 5088 f2fs_up_read(&fi->i_gc_rwsem[WRITE]); 5089 ret = -EAGAIN; 5090 goto out; 5091 } 5092 } else { 5093 ret = f2fs_convert_inline_inode(inode); 5094 if (ret) 5095 goto out; 5096 5097 f2fs_down_read(&fi->i_gc_rwsem[WRITE]); 5098 if (do_opu) 5099 f2fs_down_read(&fi->i_gc_rwsem[READ]); 5100 } 5101 5102 /* 5103 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of 5104 * the higher-level function iomap_dio_rw() in order to ensure that the 5105 * F2FS_DIO_WRITE counter will be decremented correctly in all cases. 5106 */ 5107 inc_page_count(sbi, F2FS_DIO_WRITE); 5108 dio_flags = 0; 5109 if (pos + count > inode->i_size) 5110 dio_flags |= IOMAP_DIO_FORCE_WAIT; 5111 dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops, 5112 &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0); 5113 if (IS_ERR_OR_NULL(dio)) { 5114 ret = PTR_ERR_OR_ZERO(dio); 5115 if (ret == -ENOTBLK) 5116 ret = 0; 5117 if (ret != -EIOCBQUEUED) 5118 dec_page_count(sbi, F2FS_DIO_WRITE); 5119 } else { 5120 ret = iomap_dio_complete(dio); 5121 } 5122 5123 if (do_opu) 5124 f2fs_up_read(&fi->i_gc_rwsem[READ]); 5125 f2fs_up_read(&fi->i_gc_rwsem[WRITE]); 5126 5127 if (ret < 0) 5128 goto out; 5129 if (pos + ret > inode->i_size) 5130 f2fs_i_size_write(inode, pos + ret); 5131 if (!do_opu) 5132 set_inode_flag(inode, FI_UPDATE_WRITE); 5133 5134 if (iov_iter_count(from)) { 5135 ssize_t ret2; 5136 loff_t bufio_start_pos = iocb->ki_pos; 5137 5138 /* 5139 * The direct write was partial, so we need to fall back to a 5140 * buffered write for the remainder. 5141 */ 5142 5143 ret2 = f2fs_buffered_write_iter(iocb, from); 5144 if (iov_iter_count(from)) 5145 f2fs_write_failed(inode, iocb->ki_pos); 5146 if (ret2 < 0) 5147 goto out; 5148 5149 /* 5150 * Ensure that the pagecache pages are written to disk and 5151 * invalidated to preserve the expected O_DIRECT semantics. 5152 */ 5153 if (ret2 > 0) { 5154 loff_t bufio_end_pos = bufio_start_pos + ret2 - 1; 5155 5156 ret += ret2; 5157 5158 f2fs_flush_buffered_write(file->f_mapping, 5159 bufio_start_pos, 5160 bufio_end_pos); 5161 } 5162 } else { 5163 /* iomap_dio_rw() already handled the generic_write_sync(). */ 5164 *may_need_sync = false; 5165 } 5166 out: 5167 trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret); 5168 return ret; 5169 } 5170 5171 static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 5172 { 5173 struct inode *inode = file_inode(iocb->ki_filp); 5174 const loff_t orig_pos = iocb->ki_pos; 5175 const size_t orig_count = iov_iter_count(from); 5176 loff_t target_size; 5177 bool dio; 5178 bool may_need_sync = true; 5179 int preallocated; 5180 const loff_t pos = iocb->ki_pos; 5181 const ssize_t count = iov_iter_count(from); 5182 ssize_t ret; 5183 5184 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { 5185 ret = -EIO; 5186 goto out; 5187 } 5188 5189 if (!f2fs_is_compress_backend_ready(inode)) { 5190 ret = -EOPNOTSUPP; 5191 goto out; 5192 } 5193 5194 if (iocb->ki_flags & IOCB_NOWAIT) { 5195 if (!inode_trylock(inode)) { 5196 ret = -EAGAIN; 5197 goto out; 5198 } 5199 } else { 5200 inode_lock(inode); 5201 } 5202 5203 if (f2fs_is_pinned_file(inode) && 5204 !f2fs_overwrite_io(inode, pos, count)) { 5205 ret = -EIO; 5206 goto out_unlock; 5207 } 5208 5209 ret = f2fs_write_checks(iocb, from); 5210 if (ret <= 0) 5211 goto out_unlock; 5212 5213 /* Determine whether we will do a direct write or a buffered write. */ 5214 dio = f2fs_should_use_dio(inode, iocb, from); 5215 5216 /* dio is not compatible w/ atomic write */ 5217 if (dio && f2fs_is_atomic_file(inode)) { 5218 ret = -EOPNOTSUPP; 5219 goto out_unlock; 5220 } 5221 5222 /* Possibly preallocate the blocks for the write. */ 5223 target_size = iocb->ki_pos + iov_iter_count(from); 5224 preallocated = f2fs_preallocate_blocks(iocb, from, dio); 5225 if (preallocated < 0) { 5226 ret = preallocated; 5227 } else { 5228 if (trace_f2fs_datawrite_start_enabled()) 5229 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, 5230 orig_count, WRITE); 5231 5232 /* Do the actual write. */ 5233 ret = dio ? 5234 f2fs_dio_write_iter(iocb, from, &may_need_sync) : 5235 f2fs_buffered_write_iter(iocb, from); 5236 5237 trace_f2fs_datawrite_end(inode, orig_pos, ret); 5238 } 5239 5240 /* Don't leave any preallocated blocks around past i_size. */ 5241 if (preallocated && i_size_read(inode) < target_size) { 5242 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 5243 filemap_invalidate_lock(inode->i_mapping); 5244 if (!f2fs_truncate(inode)) 5245 file_dont_truncate(inode); 5246 filemap_invalidate_unlock(inode->i_mapping); 5247 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 5248 } else { 5249 file_dont_truncate(inode); 5250 } 5251 5252 clear_inode_flag(inode, FI_PREALLOCATED_ALL); 5253 out_unlock: 5254 inode_unlock(inode); 5255 out: 5256 trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret); 5257 5258 if (ret > 0 && may_need_sync) 5259 ret = generic_write_sync(iocb, ret); 5260 5261 /* If buffered IO was forced, flush and drop the data from 5262 * the page cache to preserve O_DIRECT semantics 5263 */ 5264 if (ret > 0 && !dio && (iocb->ki_flags & IOCB_DIRECT)) 5265 f2fs_flush_buffered_write(iocb->ki_filp->f_mapping, 5266 orig_pos, 5267 orig_pos + ret - 1); 5268 5269 return ret; 5270 } 5271 5272 static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len, 5273 int advice) 5274 { 5275 struct address_space *mapping; 5276 struct backing_dev_info *bdi; 5277 struct inode *inode = file_inode(filp); 5278 int err; 5279 5280 if (advice == POSIX_FADV_SEQUENTIAL) { 5281 if (S_ISFIFO(inode->i_mode)) 5282 return -ESPIPE; 5283 5284 mapping = filp->f_mapping; 5285 if (!mapping || len < 0) 5286 return -EINVAL; 5287 5288 bdi = inode_to_bdi(mapping->host); 5289 filp->f_ra.ra_pages = bdi->ra_pages * 5290 F2FS_I_SB(inode)->seq_file_ra_mul; 5291 spin_lock(&filp->f_lock); 5292 filp->f_mode &= ~FMODE_RANDOM; 5293 spin_unlock(&filp->f_lock); 5294 return 0; 5295 } else if (advice == POSIX_FADV_WILLNEED && offset == 0) { 5296 /* Load extent cache at the first readahead. */ 5297 f2fs_precache_extents(inode); 5298 } 5299 5300 err = generic_fadvise(filp, offset, len, advice); 5301 if (err) 5302 return err; 5303 5304 if (advice == POSIX_FADV_DONTNEED && 5305 (test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) && 5306 f2fs_compressed_file(inode))) 5307 f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino); 5308 else if (advice == POSIX_FADV_NOREUSE) 5309 err = f2fs_keep_noreuse_range(inode, offset, len); 5310 return err; 5311 } 5312 5313 #ifdef CONFIG_COMPAT 5314 struct compat_f2fs_gc_range { 5315 u32 sync; 5316 compat_u64 start; 5317 compat_u64 len; 5318 }; 5319 #define F2FS_IOC32_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11,\ 5320 struct compat_f2fs_gc_range) 5321 5322 static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg) 5323 { 5324 struct compat_f2fs_gc_range __user *urange; 5325 struct f2fs_gc_range range; 5326 int err; 5327 5328 urange = compat_ptr(arg); 5329 err = get_user(range.sync, &urange->sync); 5330 err |= get_user(range.start, &urange->start); 5331 err |= get_user(range.len, &urange->len); 5332 if (err) 5333 return -EFAULT; 5334 5335 return __f2fs_ioc_gc_range(file, &range); 5336 } 5337 5338 struct compat_f2fs_move_range { 5339 u32 dst_fd; 5340 compat_u64 pos_in; 5341 compat_u64 pos_out; 5342 compat_u64 len; 5343 }; 5344 #define F2FS_IOC32_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ 5345 struct compat_f2fs_move_range) 5346 5347 static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg) 5348 { 5349 struct compat_f2fs_move_range __user *urange; 5350 struct f2fs_move_range range; 5351 int err; 5352 5353 urange = compat_ptr(arg); 5354 err = get_user(range.dst_fd, &urange->dst_fd); 5355 err |= get_user(range.pos_in, &urange->pos_in); 5356 err |= get_user(range.pos_out, &urange->pos_out); 5357 err |= get_user(range.len, &urange->len); 5358 if (err) 5359 return -EFAULT; 5360 5361 return __f2fs_ioc_move_range(file, &range); 5362 } 5363 5364 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 5365 { 5366 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) 5367 return -EIO; 5368 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file)))) 5369 return -ENOSPC; 5370 5371 switch (cmd) { 5372 case FS_IOC32_GETVERSION: 5373 cmd = FS_IOC_GETVERSION; 5374 break; 5375 case F2FS_IOC32_GARBAGE_COLLECT_RANGE: 5376 return f2fs_compat_ioc_gc_range(file, arg); 5377 case F2FS_IOC32_MOVE_RANGE: 5378 return f2fs_compat_ioc_move_range(file, arg); 5379 case F2FS_IOC_START_ATOMIC_WRITE: 5380 case F2FS_IOC_START_ATOMIC_REPLACE: 5381 case F2FS_IOC_COMMIT_ATOMIC_WRITE: 5382 case F2FS_IOC_START_VOLATILE_WRITE: 5383 case F2FS_IOC_RELEASE_VOLATILE_WRITE: 5384 case F2FS_IOC_ABORT_ATOMIC_WRITE: 5385 case F2FS_IOC_SHUTDOWN: 5386 case FITRIM: 5387 case FS_IOC_SET_ENCRYPTION_POLICY: 5388 case FS_IOC_GET_ENCRYPTION_PWSALT: 5389 case FS_IOC_GET_ENCRYPTION_POLICY: 5390 case FS_IOC_GET_ENCRYPTION_POLICY_EX: 5391 case FS_IOC_ADD_ENCRYPTION_KEY: 5392 case FS_IOC_REMOVE_ENCRYPTION_KEY: 5393 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: 5394 case FS_IOC_GET_ENCRYPTION_KEY_STATUS: 5395 case FS_IOC_GET_ENCRYPTION_NONCE: 5396 case F2FS_IOC_GARBAGE_COLLECT: 5397 case F2FS_IOC_WRITE_CHECKPOINT: 5398 case F2FS_IOC_DEFRAGMENT: 5399 case F2FS_IOC_FLUSH_DEVICE: 5400 case F2FS_IOC_GET_FEATURES: 5401 case F2FS_IOC_GET_PIN_FILE: 5402 case F2FS_IOC_SET_PIN_FILE: 5403 case F2FS_IOC_PRECACHE_EXTENTS: 5404 case F2FS_IOC_RESIZE_FS: 5405 case FS_IOC_ENABLE_VERITY: 5406 case FS_IOC_MEASURE_VERITY: 5407 case FS_IOC_READ_VERITY_METADATA: 5408 case FS_IOC_GETFSLABEL: 5409 case FS_IOC_SETFSLABEL: 5410 case F2FS_IOC_GET_COMPRESS_BLOCKS: 5411 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: 5412 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: 5413 case F2FS_IOC_SEC_TRIM_FILE: 5414 case F2FS_IOC_GET_COMPRESS_OPTION: 5415 case F2FS_IOC_SET_COMPRESS_OPTION: 5416 case F2FS_IOC_DECOMPRESS_FILE: 5417 case F2FS_IOC_COMPRESS_FILE: 5418 case F2FS_IOC_GET_DEV_ALIAS_FILE: 5419 case F2FS_IOC_IO_PRIO: 5420 break; 5421 default: 5422 return -ENOIOCTLCMD; 5423 } 5424 return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); 5425 } 5426 #endif 5427 5428 const struct file_operations f2fs_file_operations = { 5429 .llseek = f2fs_llseek, 5430 .read_iter = f2fs_file_read_iter, 5431 .write_iter = f2fs_file_write_iter, 5432 .iopoll = iocb_bio_iopoll, 5433 .open = f2fs_file_open, 5434 .release = f2fs_release_file, 5435 .mmap_prepare = f2fs_file_mmap_prepare, 5436 .flush = f2fs_file_flush, 5437 .fsync = f2fs_sync_file, 5438 .fallocate = f2fs_fallocate, 5439 .unlocked_ioctl = f2fs_ioctl, 5440 #ifdef CONFIG_COMPAT 5441 .compat_ioctl = f2fs_compat_ioctl, 5442 #endif 5443 .splice_read = f2fs_file_splice_read, 5444 .splice_write = iter_file_splice_write, 5445 .fadvise = f2fs_file_fadvise, 5446 .fop_flags = FOP_BUFFER_RASYNC, 5447 }; 5448