1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/f2fs/node.c 4 * 5 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 6 * http://www.samsung.com/ 7 */ 8 #include <linux/fs.h> 9 #include <linux/f2fs_fs.h> 10 #include <linux/mpage.h> 11 #include <linux/sched/mm.h> 12 #include <linux/blkdev.h> 13 #include <linux/pagevec.h> 14 #include <linux/swap.h> 15 16 #include "f2fs.h" 17 #include "node.h" 18 #include "segment.h" 19 #include "xattr.h" 20 #include "iostat.h" 21 #include <trace/events/f2fs.h> 22 23 #define on_f2fs_build_free_nids(nm_i) mutex_is_locked(&(nm_i)->build_lock) 24 25 static struct kmem_cache *nat_entry_slab; 26 static struct kmem_cache *free_nid_slab; 27 static struct kmem_cache *nat_entry_set_slab; 28 static struct kmem_cache *fsync_node_entry_slab; 29 30 /* 31 * Check whether the given nid is within node id range. 32 */ 33 int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) 34 { 35 if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) { 36 set_sbi_flag(sbi, SBI_NEED_FSCK); 37 f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.", 38 __func__, nid); 39 f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); 40 return -EFSCORRUPTED; 41 } 42 return 0; 43 } 44 45 bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) 46 { 47 struct f2fs_nm_info *nm_i = NM_I(sbi); 48 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 49 struct sysinfo val; 50 unsigned long avail_ram; 51 unsigned long mem_size = 0; 52 bool res = false; 53 54 if (!nm_i) 55 return true; 56 57 si_meminfo(&val); 58 59 /* only uses low memory */ 60 avail_ram = val.totalram - val.totalhigh; 61 62 /* 63 * give 25%, 25%, 50%, 50%, 25%, 25% memory for each components respectively 64 */ 65 if (type == FREE_NIDS) { 66 mem_size = (nm_i->nid_cnt[FREE_NID] * 67 sizeof(struct free_nid)) >> PAGE_SHIFT; 68 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); 69 } else if (type == NAT_ENTRIES) { 70 mem_size = (nm_i->nat_cnt[TOTAL_NAT] * 71 sizeof(struct nat_entry)) >> PAGE_SHIFT; 72 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); 73 if (excess_cached_nats(sbi)) 74 res = false; 75 } else if (type == DIRTY_DENTS) { 76 if (sbi->sb->s_bdi->wb.dirty_exceeded) 77 return false; 78 mem_size = get_pages(sbi, F2FS_DIRTY_DENTS); 79 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 80 } else if (type == INO_ENTRIES) { 81 int i; 82 83 for (i = 0; i < MAX_INO_ENTRY; i++) 84 mem_size += sbi->im[i].ino_num * 85 sizeof(struct ino_entry); 86 mem_size >>= PAGE_SHIFT; 87 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 88 } else if (type == READ_EXTENT_CACHE || type == AGE_EXTENT_CACHE) { 89 enum extent_type etype = type == READ_EXTENT_CACHE ? 90 EX_READ : EX_BLOCK_AGE; 91 struct extent_tree_info *eti = &sbi->extent_tree[etype]; 92 93 mem_size = (atomic_read(&eti->total_ext_tree) * 94 sizeof(struct extent_tree) + 95 atomic_read(&eti->total_ext_node) * 96 sizeof(struct extent_node)) >> PAGE_SHIFT; 97 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); 98 } else if (type == DISCARD_CACHE) { 99 mem_size = (atomic_read(&dcc->discard_cmd_cnt) * 100 sizeof(struct discard_cmd)) >> PAGE_SHIFT; 101 res = mem_size < (avail_ram * nm_i->ram_thresh / 100); 102 } else if (type == COMPRESS_PAGE) { 103 #ifdef CONFIG_F2FS_FS_COMPRESSION 104 unsigned long free_ram = val.freeram; 105 106 /* 107 * free memory is lower than watermark or cached page count 108 * exceed threshold, deny caching compress page. 109 */ 110 res = (free_ram > avail_ram * sbi->compress_watermark / 100) && 111 (COMPRESS_MAPPING(sbi)->nrpages < 112 free_ram * sbi->compress_percent / 100); 113 #else 114 res = false; 115 #endif 116 } else { 117 if (!sbi->sb->s_bdi->wb.dirty_exceeded) 118 return true; 119 } 120 return res; 121 } 122 123 static void clear_node_folio_dirty(struct folio *folio) 124 { 125 if (folio_test_dirty(folio)) { 126 f2fs_clear_page_cache_dirty_tag(folio); 127 folio_clear_dirty_for_io(folio); 128 dec_page_count(F2FS_F_SB(folio), F2FS_DIRTY_NODES); 129 } 130 folio_clear_uptodate(folio); 131 } 132 133 static struct folio *get_current_nat_folio(struct f2fs_sb_info *sbi, nid_t nid) 134 { 135 return f2fs_get_meta_folio_retry(sbi, current_nat_addr(sbi, nid)); 136 } 137 138 static struct folio *get_next_nat_folio(struct f2fs_sb_info *sbi, nid_t nid) 139 { 140 struct folio *src_folio; 141 struct folio *dst_folio; 142 pgoff_t dst_off; 143 void *src_addr; 144 void *dst_addr; 145 struct f2fs_nm_info *nm_i = NM_I(sbi); 146 147 dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid)); 148 149 /* get current nat block page with lock */ 150 src_folio = get_current_nat_folio(sbi, nid); 151 if (IS_ERR(src_folio)) 152 return src_folio; 153 dst_folio = f2fs_grab_meta_folio(sbi, dst_off); 154 f2fs_bug_on(sbi, folio_test_dirty(src_folio)); 155 156 src_addr = folio_address(src_folio); 157 dst_addr = folio_address(dst_folio); 158 memcpy(dst_addr, src_addr, PAGE_SIZE); 159 folio_mark_dirty(dst_folio); 160 f2fs_folio_put(src_folio, true); 161 162 set_to_next_nat(nm_i, nid); 163 164 return dst_folio; 165 } 166 167 static struct nat_entry *__alloc_nat_entry(struct f2fs_sb_info *sbi, 168 nid_t nid, bool no_fail) 169 { 170 struct nat_entry *new; 171 172 new = f2fs_kmem_cache_alloc(nat_entry_slab, 173 GFP_F2FS_ZERO, no_fail, sbi); 174 if (new) { 175 nat_set_nid(new, nid); 176 nat_reset_flag(new); 177 } 178 return new; 179 } 180 181 static void __free_nat_entry(struct nat_entry *e) 182 { 183 kmem_cache_free(nat_entry_slab, e); 184 } 185 186 /* must be locked by nat_tree_lock */ 187 static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, 188 struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail, bool init_dirty) 189 { 190 if (no_fail) 191 f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne); 192 else if (radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne)) 193 return NULL; 194 195 if (raw_ne) 196 node_info_from_raw_nat(&ne->ni, raw_ne); 197 198 if (init_dirty) { 199 INIT_LIST_HEAD(&ne->list); 200 nm_i->nat_cnt[TOTAL_NAT]++; 201 return ne; 202 } 203 204 spin_lock(&nm_i->nat_list_lock); 205 list_add_tail(&ne->list, &nm_i->nat_entries); 206 spin_unlock(&nm_i->nat_list_lock); 207 208 nm_i->nat_cnt[TOTAL_NAT]++; 209 nm_i->nat_cnt[RECLAIMABLE_NAT]++; 210 return ne; 211 } 212 213 static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n, bool for_dirty) 214 { 215 struct nat_entry *ne; 216 217 ne = radix_tree_lookup(&nm_i->nat_root, n); 218 219 /* 220 * for recent accessed nat entry which will not be dirtied soon 221 * later, move it to tail of lru list. 222 */ 223 if (ne && !get_nat_flag(ne, IS_DIRTY) && !for_dirty) { 224 spin_lock(&nm_i->nat_list_lock); 225 if (!list_empty(&ne->list)) 226 list_move_tail(&ne->list, &nm_i->nat_entries); 227 spin_unlock(&nm_i->nat_list_lock); 228 } 229 230 return ne; 231 } 232 233 static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, 234 nid_t start, unsigned int nr, struct nat_entry **ep) 235 { 236 return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr); 237 } 238 239 static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) 240 { 241 radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); 242 nm_i->nat_cnt[TOTAL_NAT]--; 243 nm_i->nat_cnt[RECLAIMABLE_NAT]--; 244 __free_nat_entry(e); 245 } 246 247 static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i, 248 struct nat_entry *ne) 249 { 250 nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); 251 struct nat_entry_set *head; 252 253 head = radix_tree_lookup(&nm_i->nat_set_root, set); 254 if (!head) { 255 head = f2fs_kmem_cache_alloc(nat_entry_set_slab, 256 GFP_NOFS, true, NULL); 257 258 INIT_LIST_HEAD(&head->entry_list); 259 INIT_LIST_HEAD(&head->set_list); 260 head->set = set; 261 head->entry_cnt = 0; 262 f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); 263 } 264 return head; 265 } 266 267 static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, 268 struct nat_entry *ne, bool init_dirty) 269 { 270 struct nat_entry_set *head; 271 bool new_ne = nat_get_blkaddr(ne) == NEW_ADDR; 272 273 if (!new_ne) 274 head = __grab_nat_entry_set(nm_i, ne); 275 276 /* 277 * update entry_cnt in below condition: 278 * 1. update NEW_ADDR to valid block address; 279 * 2. update old block address to new one; 280 */ 281 if (!new_ne && (get_nat_flag(ne, IS_PREALLOC) || 282 !get_nat_flag(ne, IS_DIRTY))) 283 head->entry_cnt++; 284 285 set_nat_flag(ne, IS_PREALLOC, new_ne); 286 287 if (get_nat_flag(ne, IS_DIRTY)) 288 goto refresh_list; 289 290 nm_i->nat_cnt[DIRTY_NAT]++; 291 if (!init_dirty) 292 nm_i->nat_cnt[RECLAIMABLE_NAT]--; 293 set_nat_flag(ne, IS_DIRTY, true); 294 refresh_list: 295 spin_lock(&nm_i->nat_list_lock); 296 if (new_ne) 297 list_del_init(&ne->list); 298 else 299 list_move_tail(&ne->list, &head->entry_list); 300 spin_unlock(&nm_i->nat_list_lock); 301 } 302 303 static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, 304 struct nat_entry_set *set, struct nat_entry *ne) 305 { 306 spin_lock(&nm_i->nat_list_lock); 307 list_move_tail(&ne->list, &nm_i->nat_entries); 308 spin_unlock(&nm_i->nat_list_lock); 309 310 set_nat_flag(ne, IS_DIRTY, false); 311 set->entry_cnt--; 312 nm_i->nat_cnt[DIRTY_NAT]--; 313 nm_i->nat_cnt[RECLAIMABLE_NAT]++; 314 } 315 316 static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, 317 nid_t start, unsigned int nr, struct nat_entry_set **ep) 318 { 319 return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep, 320 start, nr); 321 } 322 323 bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio) 324 { 325 return is_node_folio(folio) && IS_DNODE(folio) && is_cold_node(folio); 326 } 327 328 void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi) 329 { 330 spin_lock_init(&sbi->fsync_node_lock); 331 INIT_LIST_HEAD(&sbi->fsync_node_list); 332 sbi->fsync_seg_id = 0; 333 sbi->fsync_node_num = 0; 334 } 335 336 static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi, 337 struct folio *folio) 338 { 339 struct fsync_node_entry *fn; 340 unsigned long flags; 341 unsigned int seq_id; 342 343 fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, 344 GFP_NOFS, true, NULL); 345 346 folio_get(folio); 347 fn->folio = folio; 348 INIT_LIST_HEAD(&fn->list); 349 350 spin_lock_irqsave(&sbi->fsync_node_lock, flags); 351 list_add_tail(&fn->list, &sbi->fsync_node_list); 352 fn->seq_id = sbi->fsync_seg_id++; 353 seq_id = fn->seq_id; 354 sbi->fsync_node_num++; 355 spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); 356 357 return seq_id; 358 } 359 360 void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct folio *folio) 361 { 362 struct fsync_node_entry *fn; 363 unsigned long flags; 364 365 spin_lock_irqsave(&sbi->fsync_node_lock, flags); 366 list_for_each_entry(fn, &sbi->fsync_node_list, list) { 367 if (fn->folio == folio) { 368 list_del(&fn->list); 369 sbi->fsync_node_num--; 370 spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); 371 kmem_cache_free(fsync_node_entry_slab, fn); 372 folio_put(folio); 373 return; 374 } 375 } 376 spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); 377 f2fs_bug_on(sbi, 1); 378 } 379 380 void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi) 381 { 382 unsigned long flags; 383 384 spin_lock_irqsave(&sbi->fsync_node_lock, flags); 385 sbi->fsync_seg_id = 0; 386 spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); 387 } 388 389 int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) 390 { 391 struct f2fs_nm_info *nm_i = NM_I(sbi); 392 struct nat_entry *e; 393 bool need = false; 394 395 f2fs_down_read(&nm_i->nat_tree_lock); 396 e = __lookup_nat_cache(nm_i, nid, false); 397 if (e) { 398 if (!get_nat_flag(e, IS_CHECKPOINTED) && 399 !get_nat_flag(e, HAS_FSYNCED_INODE)) 400 need = true; 401 } 402 f2fs_up_read(&nm_i->nat_tree_lock); 403 return need; 404 } 405 406 bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) 407 { 408 struct f2fs_nm_info *nm_i = NM_I(sbi); 409 struct nat_entry *e; 410 bool is_cp = true; 411 412 f2fs_down_read(&nm_i->nat_tree_lock); 413 e = __lookup_nat_cache(nm_i, nid, false); 414 if (e && !get_nat_flag(e, IS_CHECKPOINTED)) 415 is_cp = false; 416 f2fs_up_read(&nm_i->nat_tree_lock); 417 return is_cp; 418 } 419 420 bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) 421 { 422 struct f2fs_nm_info *nm_i = NM_I(sbi); 423 struct nat_entry *e; 424 bool need_update = true; 425 426 f2fs_down_read(&nm_i->nat_tree_lock); 427 e = __lookup_nat_cache(nm_i, ino, false); 428 if (e && get_nat_flag(e, HAS_LAST_FSYNC) && 429 (get_nat_flag(e, IS_CHECKPOINTED) || 430 get_nat_flag(e, HAS_FSYNCED_INODE))) 431 need_update = false; 432 f2fs_up_read(&nm_i->nat_tree_lock); 433 return need_update; 434 } 435 436 /* must be locked by nat_tree_lock */ 437 static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, 438 struct f2fs_nat_entry *ne) 439 { 440 struct f2fs_nm_info *nm_i = NM_I(sbi); 441 struct nat_entry *new, *e; 442 443 /* Let's mitigate lock contention of nat_tree_lock during checkpoint */ 444 if (f2fs_rwsem_is_locked(&sbi->cp_global_sem)) 445 return; 446 447 new = __alloc_nat_entry(sbi, nid, false); 448 if (!new) 449 return; 450 451 f2fs_down_write(&nm_i->nat_tree_lock); 452 e = __lookup_nat_cache(nm_i, nid, false); 453 if (!e) 454 e = __init_nat_entry(nm_i, new, ne, false, false); 455 else 456 f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) || 457 nat_get_blkaddr(e) != 458 le32_to_cpu(ne->block_addr) || 459 nat_get_version(e) != ne->version); 460 f2fs_up_write(&nm_i->nat_tree_lock); 461 if (e != new) 462 __free_nat_entry(new); 463 } 464 465 static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, 466 block_t new_blkaddr, bool fsync_done) 467 { 468 struct f2fs_nm_info *nm_i = NM_I(sbi); 469 struct nat_entry *e; 470 struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true); 471 bool init_dirty = false; 472 473 f2fs_down_write(&nm_i->nat_tree_lock); 474 e = __lookup_nat_cache(nm_i, ni->nid, true); 475 if (!e) { 476 init_dirty = true; 477 e = __init_nat_entry(nm_i, new, NULL, true, true); 478 copy_node_info(&e->ni, ni); 479 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); 480 } else if (new_blkaddr == NEW_ADDR) { 481 /* 482 * when nid is reallocated, 483 * previous nat entry can be remained in nat cache. 484 * So, reinitialize it with new information. 485 */ 486 copy_node_info(&e->ni, ni); 487 f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); 488 } 489 /* let's free early to reduce memory consumption */ 490 if (e != new) 491 __free_nat_entry(new); 492 493 /* sanity check */ 494 f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr); 495 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR && 496 new_blkaddr == NULL_ADDR); 497 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && 498 new_blkaddr == NEW_ADDR); 499 f2fs_bug_on(sbi, __is_valid_data_blkaddr(nat_get_blkaddr(e)) && 500 new_blkaddr == NEW_ADDR); 501 502 /* increment version no as node is removed */ 503 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { 504 unsigned char version = nat_get_version(e); 505 506 nat_set_version(e, inc_node_version(version)); 507 } 508 509 /* change address */ 510 nat_set_blkaddr(e, new_blkaddr); 511 if (!__is_valid_data_blkaddr(new_blkaddr)) 512 set_nat_flag(e, IS_CHECKPOINTED, false); 513 __set_nat_cache_dirty(nm_i, e, init_dirty); 514 515 /* update fsync_mark if its inode nat entry is still alive */ 516 if (ni->nid != ni->ino) 517 e = __lookup_nat_cache(nm_i, ni->ino, false); 518 if (e) { 519 if (fsync_done && ni->nid == ni->ino) 520 set_nat_flag(e, HAS_FSYNCED_INODE, true); 521 set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); 522 } 523 f2fs_up_write(&nm_i->nat_tree_lock); 524 } 525 526 int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) 527 { 528 struct f2fs_nm_info *nm_i = NM_I(sbi); 529 int nr = nr_shrink; 530 531 if (!f2fs_down_write_trylock(&nm_i->nat_tree_lock)) 532 return 0; 533 534 spin_lock(&nm_i->nat_list_lock); 535 while (nr_shrink) { 536 struct nat_entry *ne; 537 538 if (list_empty(&nm_i->nat_entries)) 539 break; 540 541 ne = list_first_entry(&nm_i->nat_entries, 542 struct nat_entry, list); 543 list_del(&ne->list); 544 spin_unlock(&nm_i->nat_list_lock); 545 546 __del_from_nat_cache(nm_i, ne); 547 nr_shrink--; 548 549 spin_lock(&nm_i->nat_list_lock); 550 } 551 spin_unlock(&nm_i->nat_list_lock); 552 553 f2fs_up_write(&nm_i->nat_tree_lock); 554 return nr - nr_shrink; 555 } 556 557 int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, 558 struct node_info *ni, bool checkpoint_context) 559 { 560 struct f2fs_nm_info *nm_i = NM_I(sbi); 561 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 562 struct f2fs_journal *journal = curseg->journal; 563 nid_t start_nid = START_NID(nid); 564 struct f2fs_nat_block *nat_blk; 565 struct folio *folio = NULL; 566 struct f2fs_nat_entry ne; 567 struct nat_entry *e; 568 pgoff_t index; 569 int i; 570 bool need_cache = true; 571 572 ni->flag = 0; 573 ni->nid = nid; 574 retry: 575 /* Check nat cache */ 576 f2fs_down_read(&nm_i->nat_tree_lock); 577 e = __lookup_nat_cache(nm_i, nid, false); 578 if (e) { 579 ni->ino = nat_get_ino(e); 580 ni->blk_addr = nat_get_blkaddr(e); 581 ni->version = nat_get_version(e); 582 f2fs_up_read(&nm_i->nat_tree_lock); 583 if (IS_ENABLED(CONFIG_F2FS_CHECK_FS)) { 584 need_cache = false; 585 goto sanity_check; 586 } 587 return 0; 588 } 589 590 /* 591 * Check current segment summary by trying to grab journal_rwsem first. 592 * This sem is on the critical path on the checkpoint requiring the above 593 * nat_tree_lock. Therefore, we should retry, if we failed to grab here 594 * while not bothering checkpoint. 595 */ 596 if (!f2fs_rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) { 597 down_read(&curseg->journal_rwsem); 598 } else if (f2fs_rwsem_is_contended(&nm_i->nat_tree_lock) || 599 !down_read_trylock(&curseg->journal_rwsem)) { 600 f2fs_up_read(&nm_i->nat_tree_lock); 601 goto retry; 602 } 603 604 i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0); 605 if (i >= 0) { 606 ne = nat_in_journal(journal, i); 607 node_info_from_raw_nat(ni, &ne); 608 } 609 up_read(&curseg->journal_rwsem); 610 if (i >= 0) { 611 f2fs_up_read(&nm_i->nat_tree_lock); 612 goto sanity_check; 613 } 614 615 /* Fill node_info from nat page */ 616 index = current_nat_addr(sbi, nid); 617 f2fs_up_read(&nm_i->nat_tree_lock); 618 619 folio = f2fs_get_meta_folio(sbi, index); 620 if (IS_ERR(folio)) 621 return PTR_ERR(folio); 622 623 nat_blk = folio_address(folio); 624 ne = nat_blk->entries[nid - start_nid]; 625 node_info_from_raw_nat(ni, &ne); 626 f2fs_folio_put(folio, true); 627 sanity_check: 628 if (__is_valid_data_blkaddr(ni->blk_addr) && 629 !f2fs_is_valid_blkaddr(sbi, ni->blk_addr, 630 DATA_GENERIC_ENHANCE)) { 631 set_sbi_flag(sbi, SBI_NEED_FSCK); 632 f2fs_err_ratelimited(sbi, 633 "f2fs_get_node_info of %pS: inconsistent nat entry, " 634 "ino:%u, nid:%u, blkaddr:%u, ver:%u, flag:%u", 635 __builtin_return_address(0), 636 ni->ino, ni->nid, ni->blk_addr, ni->version, ni->flag); 637 f2fs_handle_error(sbi, ERROR_INCONSISTENT_NAT); 638 return -EFSCORRUPTED; 639 } 640 641 /* cache nat entry */ 642 if (need_cache) 643 cache_nat_entry(sbi, nid, &ne); 644 return 0; 645 } 646 647 /* 648 * readahead MAX_RA_NODE number of node pages. 649 */ 650 static void f2fs_ra_node_pages(struct folio *parent, int start, int n) 651 { 652 struct f2fs_sb_info *sbi = F2FS_F_SB(parent); 653 struct blk_plug plug; 654 int i, end; 655 nid_t nid; 656 657 blk_start_plug(&plug); 658 659 /* Then, try readahead for siblings of the desired node */ 660 end = start + n; 661 end = min(end, (int)NIDS_PER_BLOCK); 662 for (i = start; i < end; i++) { 663 nid = get_nid(parent, i, false); 664 f2fs_ra_node_page(sbi, nid); 665 } 666 667 blk_finish_plug(&plug); 668 } 669 670 pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs) 671 { 672 const long direct_index = ADDRS_PER_INODE(dn->inode); 673 const long direct_blks = ADDRS_PER_BLOCK(dn->inode); 674 const long indirect_blks = ADDRS_PER_BLOCK(dn->inode) * NIDS_PER_BLOCK; 675 unsigned int skipped_unit = ADDRS_PER_BLOCK(dn->inode); 676 int cur_level = dn->cur_level; 677 int max_level = dn->max_level; 678 pgoff_t base = 0; 679 680 if (!dn->max_level) 681 return pgofs + 1; 682 683 while (max_level-- > cur_level) 684 skipped_unit *= NIDS_PER_BLOCK; 685 686 switch (dn->max_level) { 687 case 3: 688 base += 2 * indirect_blks; 689 fallthrough; 690 case 2: 691 base += 2 * direct_blks; 692 fallthrough; 693 case 1: 694 base += direct_index; 695 break; 696 default: 697 f2fs_bug_on(F2FS_I_SB(dn->inode), 1); 698 } 699 700 return ((pgofs - base) / skipped_unit + 1) * skipped_unit + base; 701 } 702 703 /* 704 * The maximum depth is four. 705 * Offset[0] will have raw inode offset. 706 */ 707 static int get_node_path(struct inode *inode, long block, 708 int offset[4], unsigned int noffset[4]) 709 { 710 const long direct_index = ADDRS_PER_INODE(inode); 711 const long direct_blks = ADDRS_PER_BLOCK(inode); 712 const long dptrs_per_blk = NIDS_PER_BLOCK; 713 const long indirect_blks = ADDRS_PER_BLOCK(inode) * NIDS_PER_BLOCK; 714 const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK; 715 int n = 0; 716 int level = 0; 717 718 noffset[0] = 0; 719 720 if (block < direct_index) { 721 offset[n] = block; 722 goto got; 723 } 724 block -= direct_index; 725 if (block < direct_blks) { 726 offset[n++] = NODE_DIR1_BLOCK; 727 noffset[n] = 1; 728 offset[n] = block; 729 level = 1; 730 goto got; 731 } 732 block -= direct_blks; 733 if (block < direct_blks) { 734 offset[n++] = NODE_DIR2_BLOCK; 735 noffset[n] = 2; 736 offset[n] = block; 737 level = 1; 738 goto got; 739 } 740 block -= direct_blks; 741 if (block < indirect_blks) { 742 offset[n++] = NODE_IND1_BLOCK; 743 noffset[n] = 3; 744 offset[n++] = block / direct_blks; 745 noffset[n] = 4 + offset[n - 1]; 746 offset[n] = block % direct_blks; 747 level = 2; 748 goto got; 749 } 750 block -= indirect_blks; 751 if (block < indirect_blks) { 752 offset[n++] = NODE_IND2_BLOCK; 753 noffset[n] = 4 + dptrs_per_blk; 754 offset[n++] = block / direct_blks; 755 noffset[n] = 5 + dptrs_per_blk + offset[n - 1]; 756 offset[n] = block % direct_blks; 757 level = 2; 758 goto got; 759 } 760 block -= indirect_blks; 761 if (block < dindirect_blks) { 762 offset[n++] = NODE_DIND_BLOCK; 763 noffset[n] = 5 + (dptrs_per_blk * 2); 764 offset[n++] = block / indirect_blks; 765 noffset[n] = 6 + (dptrs_per_blk * 2) + 766 offset[n - 1] * (dptrs_per_blk + 1); 767 offset[n++] = (block / direct_blks) % dptrs_per_blk; 768 noffset[n] = 7 + (dptrs_per_blk * 2) + 769 offset[n - 2] * (dptrs_per_blk + 1) + 770 offset[n - 1]; 771 offset[n] = block % direct_blks; 772 level = 3; 773 goto got; 774 } else { 775 return -E2BIG; 776 } 777 got: 778 return level; 779 } 780 781 static struct folio *f2fs_get_node_folio_ra(struct folio *parent, int start); 782 783 /* 784 * Caller should call f2fs_put_dnode(dn). 785 * Also, it should grab and release a rwsem by calling f2fs_lock_op() and 786 * f2fs_unlock_op() only if mode is set with ALLOC_NODE. 787 */ 788 int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) 789 { 790 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 791 struct folio *nfolio[4]; 792 struct folio *parent = NULL; 793 int offset[4]; 794 unsigned int noffset[4]; 795 nid_t nids[4]; 796 int level, i = 0; 797 int err = 0; 798 799 level = get_node_path(dn->inode, index, offset, noffset); 800 if (level < 0) 801 return level; 802 803 nids[0] = dn->inode->i_ino; 804 805 if (!dn->inode_folio) { 806 nfolio[0] = f2fs_get_inode_folio(sbi, nids[0]); 807 if (IS_ERR(nfolio[0])) 808 return PTR_ERR(nfolio[0]); 809 } else { 810 nfolio[0] = dn->inode_folio; 811 } 812 813 /* if inline_data is set, should not report any block indices */ 814 if (f2fs_has_inline_data(dn->inode) && index) { 815 err = -ENOENT; 816 f2fs_folio_put(nfolio[0], true); 817 goto release_out; 818 } 819 820 parent = nfolio[0]; 821 if (level != 0) 822 nids[1] = get_nid(parent, offset[0], true); 823 dn->inode_folio = nfolio[0]; 824 dn->inode_folio_locked = true; 825 826 /* get indirect or direct nodes */ 827 for (i = 1; i <= level; i++) { 828 bool done = false; 829 830 if (nids[i] && nids[i] == dn->inode->i_ino) { 831 err = -EFSCORRUPTED; 832 f2fs_err_ratelimited(sbi, 833 "inode mapping table is corrupted, run fsck to fix it, " 834 "ino:%lu, nid:%u, level:%d, offset:%d", 835 dn->inode->i_ino, nids[i], level, offset[level]); 836 set_sbi_flag(sbi, SBI_NEED_FSCK); 837 goto release_pages; 838 } 839 840 if (!nids[i] && mode == ALLOC_NODE) { 841 /* alloc new node */ 842 if (!f2fs_alloc_nid(sbi, &(nids[i]))) { 843 err = -ENOSPC; 844 goto release_pages; 845 } 846 847 dn->nid = nids[i]; 848 nfolio[i] = f2fs_new_node_folio(dn, noffset[i]); 849 if (IS_ERR(nfolio[i])) { 850 f2fs_alloc_nid_failed(sbi, nids[i]); 851 err = PTR_ERR(nfolio[i]); 852 goto release_pages; 853 } 854 855 set_nid(parent, offset[i - 1], nids[i], i == 1); 856 f2fs_alloc_nid_done(sbi, nids[i]); 857 done = true; 858 } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { 859 nfolio[i] = f2fs_get_node_folio_ra(parent, offset[i - 1]); 860 if (IS_ERR(nfolio[i])) { 861 err = PTR_ERR(nfolio[i]); 862 goto release_pages; 863 } 864 done = true; 865 } 866 if (i == 1) { 867 dn->inode_folio_locked = false; 868 folio_unlock(parent); 869 } else { 870 f2fs_folio_put(parent, true); 871 } 872 873 if (!done) { 874 nfolio[i] = f2fs_get_node_folio(sbi, nids[i]); 875 if (IS_ERR(nfolio[i])) { 876 err = PTR_ERR(nfolio[i]); 877 f2fs_folio_put(nfolio[0], false); 878 goto release_out; 879 } 880 } 881 if (i < level) { 882 parent = nfolio[i]; 883 nids[i + 1] = get_nid(parent, offset[i], false); 884 } 885 } 886 dn->nid = nids[level]; 887 dn->ofs_in_node = offset[level]; 888 dn->node_folio = nfolio[level]; 889 dn->data_blkaddr = f2fs_data_blkaddr(dn); 890 891 if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) && 892 f2fs_sb_has_readonly(sbi)) { 893 unsigned int cluster_size = F2FS_I(dn->inode)->i_cluster_size; 894 unsigned int ofs_in_node = dn->ofs_in_node; 895 pgoff_t fofs = index; 896 unsigned int c_len; 897 block_t blkaddr; 898 899 /* should align fofs and ofs_in_node to cluster_size */ 900 if (fofs % cluster_size) { 901 fofs = round_down(fofs, cluster_size); 902 ofs_in_node = round_down(ofs_in_node, cluster_size); 903 } 904 905 c_len = f2fs_cluster_blocks_are_contiguous(dn, ofs_in_node); 906 if (!c_len) 907 goto out; 908 909 blkaddr = data_blkaddr(dn->inode, dn->node_folio, ofs_in_node); 910 if (blkaddr == COMPRESS_ADDR) 911 blkaddr = data_blkaddr(dn->inode, dn->node_folio, 912 ofs_in_node + 1); 913 914 f2fs_update_read_extent_tree_range_compressed(dn->inode, 915 fofs, blkaddr, cluster_size, c_len); 916 } 917 out: 918 return 0; 919 920 release_pages: 921 f2fs_folio_put(parent, true); 922 if (i > 1) 923 f2fs_folio_put(nfolio[0], false); 924 release_out: 925 dn->inode_folio = NULL; 926 dn->node_folio = NULL; 927 if (err == -ENOENT) { 928 dn->cur_level = i; 929 dn->max_level = level; 930 dn->ofs_in_node = offset[level]; 931 } 932 return err; 933 } 934 935 static int truncate_node(struct dnode_of_data *dn) 936 { 937 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 938 struct node_info ni; 939 int err; 940 pgoff_t index; 941 942 err = f2fs_get_node_info(sbi, dn->nid, &ni, false); 943 if (err) 944 return err; 945 946 if (ni.blk_addr != NEW_ADDR && 947 !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC_ENHANCE)) { 948 f2fs_err_ratelimited(sbi, 949 "nat entry is corrupted, run fsck to fix it, ino:%u, " 950 "nid:%u, blkaddr:%u", ni.ino, ni.nid, ni.blk_addr); 951 set_sbi_flag(sbi, SBI_NEED_FSCK); 952 f2fs_handle_error(sbi, ERROR_INCONSISTENT_NAT); 953 return -EFSCORRUPTED; 954 } 955 956 /* Deallocate node address */ 957 f2fs_invalidate_blocks(sbi, ni.blk_addr, 1); 958 dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino); 959 set_node_addr(sbi, &ni, NULL_ADDR, false); 960 961 if (dn->nid == dn->inode->i_ino) { 962 f2fs_remove_orphan_inode(sbi, dn->nid); 963 dec_valid_inode_count(sbi); 964 f2fs_inode_synced(dn->inode); 965 } 966 967 clear_node_folio_dirty(dn->node_folio); 968 set_sbi_flag(sbi, SBI_IS_DIRTY); 969 970 index = dn->node_folio->index; 971 f2fs_folio_put(dn->node_folio, true); 972 973 invalidate_mapping_pages(NODE_MAPPING(sbi), 974 index, index); 975 976 dn->node_folio = NULL; 977 trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); 978 979 return 0; 980 } 981 982 static int truncate_dnode(struct dnode_of_data *dn) 983 { 984 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 985 struct folio *folio; 986 int err; 987 988 if (dn->nid == 0) 989 return 1; 990 991 /* get direct node */ 992 folio = f2fs_get_node_folio(sbi, dn->nid); 993 if (PTR_ERR(folio) == -ENOENT) 994 return 1; 995 else if (IS_ERR(folio)) 996 return PTR_ERR(folio); 997 998 if (IS_INODE(folio) || ino_of_node(folio) != dn->inode->i_ino) { 999 f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u", 1000 dn->inode->i_ino, dn->nid, ino_of_node(folio)); 1001 set_sbi_flag(sbi, SBI_NEED_FSCK); 1002 f2fs_handle_error(sbi, ERROR_INVALID_NODE_REFERENCE); 1003 f2fs_folio_put(folio, true); 1004 return -EFSCORRUPTED; 1005 } 1006 1007 /* Make dnode_of_data for parameter */ 1008 dn->node_folio = folio; 1009 dn->ofs_in_node = 0; 1010 f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode)); 1011 err = truncate_node(dn); 1012 if (err) { 1013 f2fs_folio_put(folio, true); 1014 return err; 1015 } 1016 1017 return 1; 1018 } 1019 1020 static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, 1021 int ofs, int depth) 1022 { 1023 struct dnode_of_data rdn = *dn; 1024 struct folio *folio; 1025 struct f2fs_node *rn; 1026 nid_t child_nid; 1027 unsigned int child_nofs; 1028 int freed = 0; 1029 int i, ret; 1030 1031 if (dn->nid == 0) 1032 return NIDS_PER_BLOCK + 1; 1033 1034 trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); 1035 1036 folio = f2fs_get_node_folio(F2FS_I_SB(dn->inode), dn->nid); 1037 if (IS_ERR(folio)) { 1038 trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(folio)); 1039 return PTR_ERR(folio); 1040 } 1041 1042 f2fs_ra_node_pages(folio, ofs, NIDS_PER_BLOCK); 1043 1044 rn = F2FS_NODE(folio); 1045 if (depth < 3) { 1046 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { 1047 child_nid = le32_to_cpu(rn->in.nid[i]); 1048 if (child_nid == 0) 1049 continue; 1050 rdn.nid = child_nid; 1051 ret = truncate_dnode(&rdn); 1052 if (ret < 0) 1053 goto out_err; 1054 if (set_nid(folio, i, 0, false)) 1055 dn->node_changed = true; 1056 } 1057 } else { 1058 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1; 1059 for (i = ofs; i < NIDS_PER_BLOCK; i++) { 1060 child_nid = le32_to_cpu(rn->in.nid[i]); 1061 if (child_nid == 0) { 1062 child_nofs += NIDS_PER_BLOCK + 1; 1063 continue; 1064 } 1065 rdn.nid = child_nid; 1066 ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); 1067 if (ret == (NIDS_PER_BLOCK + 1)) { 1068 if (set_nid(folio, i, 0, false)) 1069 dn->node_changed = true; 1070 child_nofs += ret; 1071 } else if (ret < 0 && ret != -ENOENT) { 1072 goto out_err; 1073 } 1074 } 1075 freed = child_nofs; 1076 } 1077 1078 if (!ofs) { 1079 /* remove current indirect node */ 1080 dn->node_folio = folio; 1081 ret = truncate_node(dn); 1082 if (ret) 1083 goto out_err; 1084 freed++; 1085 } else { 1086 f2fs_folio_put(folio, true); 1087 } 1088 trace_f2fs_truncate_nodes_exit(dn->inode, freed); 1089 return freed; 1090 1091 out_err: 1092 f2fs_folio_put(folio, true); 1093 trace_f2fs_truncate_nodes_exit(dn->inode, ret); 1094 return ret; 1095 } 1096 1097 static int truncate_partial_nodes(struct dnode_of_data *dn, 1098 struct f2fs_inode *ri, int *offset, int depth) 1099 { 1100 struct folio *folios[2]; 1101 nid_t nid[3]; 1102 nid_t child_nid; 1103 int err = 0; 1104 int i; 1105 int idx = depth - 2; 1106 1107 nid[0] = get_nid(dn->inode_folio, offset[0], true); 1108 if (!nid[0]) 1109 return 0; 1110 1111 /* get indirect nodes in the path */ 1112 for (i = 0; i < idx + 1; i++) { 1113 /* reference count'll be increased */ 1114 folios[i] = f2fs_get_node_folio(F2FS_I_SB(dn->inode), nid[i]); 1115 if (IS_ERR(folios[i])) { 1116 err = PTR_ERR(folios[i]); 1117 idx = i - 1; 1118 goto fail; 1119 } 1120 nid[i + 1] = get_nid(folios[i], offset[i + 1], false); 1121 } 1122 1123 f2fs_ra_node_pages(folios[idx], offset[idx + 1], NIDS_PER_BLOCK); 1124 1125 /* free direct nodes linked to a partial indirect node */ 1126 for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { 1127 child_nid = get_nid(folios[idx], i, false); 1128 if (!child_nid) 1129 continue; 1130 dn->nid = child_nid; 1131 err = truncate_dnode(dn); 1132 if (err < 0) 1133 goto fail; 1134 if (set_nid(folios[idx], i, 0, false)) 1135 dn->node_changed = true; 1136 } 1137 1138 if (offset[idx + 1] == 0) { 1139 dn->node_folio = folios[idx]; 1140 dn->nid = nid[idx]; 1141 err = truncate_node(dn); 1142 if (err) 1143 goto fail; 1144 } else { 1145 f2fs_folio_put(folios[idx], true); 1146 } 1147 offset[idx]++; 1148 offset[idx + 1] = 0; 1149 idx--; 1150 fail: 1151 for (i = idx; i >= 0; i--) 1152 f2fs_folio_put(folios[i], true); 1153 1154 trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); 1155 1156 return err; 1157 } 1158 1159 /* 1160 * All the block addresses of data and nodes should be nullified. 1161 */ 1162 int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) 1163 { 1164 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1165 int err = 0, cont = 1; 1166 int level, offset[4], noffset[4]; 1167 unsigned int nofs = 0; 1168 struct f2fs_inode *ri; 1169 struct dnode_of_data dn; 1170 struct folio *folio; 1171 1172 trace_f2fs_truncate_inode_blocks_enter(inode, from); 1173 1174 level = get_node_path(inode, from, offset, noffset); 1175 if (level <= 0) { 1176 if (!level) { 1177 level = -EFSCORRUPTED; 1178 f2fs_err(sbi, "%s: inode ino=%lx has corrupted node block, from:%lu addrs:%u", 1179 __func__, inode->i_ino, 1180 from, ADDRS_PER_INODE(inode)); 1181 set_sbi_flag(sbi, SBI_NEED_FSCK); 1182 } 1183 trace_f2fs_truncate_inode_blocks_exit(inode, level); 1184 return level; 1185 } 1186 1187 folio = f2fs_get_inode_folio(sbi, inode->i_ino); 1188 if (IS_ERR(folio)) { 1189 trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(folio)); 1190 return PTR_ERR(folio); 1191 } 1192 1193 set_new_dnode(&dn, inode, folio, NULL, 0); 1194 folio_unlock(folio); 1195 1196 ri = F2FS_INODE(folio); 1197 switch (level) { 1198 case 0: 1199 case 1: 1200 nofs = noffset[1]; 1201 break; 1202 case 2: 1203 nofs = noffset[1]; 1204 if (!offset[level - 1]) 1205 goto skip_partial; 1206 err = truncate_partial_nodes(&dn, ri, offset, level); 1207 if (err < 0 && err != -ENOENT) 1208 goto fail; 1209 nofs += 1 + NIDS_PER_BLOCK; 1210 break; 1211 case 3: 1212 nofs = 5 + 2 * NIDS_PER_BLOCK; 1213 if (!offset[level - 1]) 1214 goto skip_partial; 1215 err = truncate_partial_nodes(&dn, ri, offset, level); 1216 if (err < 0 && err != -ENOENT) 1217 goto fail; 1218 break; 1219 default: 1220 BUG(); 1221 } 1222 1223 skip_partial: 1224 while (cont) { 1225 dn.nid = get_nid(folio, offset[0], true); 1226 switch (offset[0]) { 1227 case NODE_DIR1_BLOCK: 1228 case NODE_DIR2_BLOCK: 1229 err = truncate_dnode(&dn); 1230 break; 1231 1232 case NODE_IND1_BLOCK: 1233 case NODE_IND2_BLOCK: 1234 err = truncate_nodes(&dn, nofs, offset[1], 2); 1235 break; 1236 1237 case NODE_DIND_BLOCK: 1238 err = truncate_nodes(&dn, nofs, offset[1], 3); 1239 cont = 0; 1240 break; 1241 1242 default: 1243 BUG(); 1244 } 1245 if (err == -ENOENT) { 1246 set_sbi_flag(F2FS_F_SB(folio), SBI_NEED_FSCK); 1247 f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); 1248 f2fs_err_ratelimited(sbi, 1249 "truncate node fail, ino:%lu, nid:%u, " 1250 "offset[0]:%d, offset[1]:%d, nofs:%d", 1251 inode->i_ino, dn.nid, offset[0], 1252 offset[1], nofs); 1253 err = 0; 1254 } 1255 if (err < 0) 1256 goto fail; 1257 if (offset[1] == 0 && get_nid(folio, offset[0], true)) { 1258 folio_lock(folio); 1259 BUG_ON(!is_node_folio(folio)); 1260 set_nid(folio, offset[0], 0, true); 1261 folio_unlock(folio); 1262 } 1263 offset[1] = 0; 1264 offset[0]++; 1265 nofs += err; 1266 } 1267 fail: 1268 f2fs_folio_put(folio, false); 1269 trace_f2fs_truncate_inode_blocks_exit(inode, err); 1270 return err > 0 ? 0 : err; 1271 } 1272 1273 /* caller must lock inode page */ 1274 int f2fs_truncate_xattr_node(struct inode *inode) 1275 { 1276 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1277 nid_t nid = F2FS_I(inode)->i_xattr_nid; 1278 struct dnode_of_data dn; 1279 struct folio *nfolio; 1280 int err; 1281 1282 if (!nid) 1283 return 0; 1284 1285 nfolio = f2fs_get_xnode_folio(sbi, nid); 1286 if (IS_ERR(nfolio)) 1287 return PTR_ERR(nfolio); 1288 1289 set_new_dnode(&dn, inode, NULL, nfolio, nid); 1290 err = truncate_node(&dn); 1291 if (err) { 1292 f2fs_folio_put(nfolio, true); 1293 return err; 1294 } 1295 1296 f2fs_i_xnid_write(inode, 0); 1297 1298 return 0; 1299 } 1300 1301 /* 1302 * Caller should grab and release a rwsem by calling f2fs_lock_op() and 1303 * f2fs_unlock_op(). 1304 */ 1305 int f2fs_remove_inode_page(struct inode *inode) 1306 { 1307 struct dnode_of_data dn; 1308 int err; 1309 1310 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); 1311 err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE); 1312 if (err) 1313 return err; 1314 1315 err = f2fs_truncate_xattr_node(inode); 1316 if (err) { 1317 f2fs_put_dnode(&dn); 1318 return err; 1319 } 1320 1321 /* remove potential inline_data blocks */ 1322 if (!IS_DEVICE_ALIASING(inode) && 1323 (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 1324 S_ISLNK(inode->i_mode))) 1325 f2fs_truncate_data_blocks_range(&dn, 1); 1326 1327 /* 0 is possible, after f2fs_new_inode() has failed */ 1328 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { 1329 f2fs_put_dnode(&dn); 1330 return -EIO; 1331 } 1332 1333 if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) { 1334 f2fs_warn(F2FS_I_SB(inode), 1335 "f2fs_remove_inode_page: inconsistent i_blocks, ino:%lu, iblocks:%llu", 1336 inode->i_ino, (unsigned long long)inode->i_blocks); 1337 set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); 1338 } 1339 1340 /* will put inode & node pages */ 1341 err = truncate_node(&dn); 1342 if (err) { 1343 f2fs_put_dnode(&dn); 1344 return err; 1345 } 1346 return 0; 1347 } 1348 1349 struct folio *f2fs_new_inode_folio(struct inode *inode) 1350 { 1351 struct dnode_of_data dn; 1352 1353 /* allocate inode page for new inode */ 1354 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); 1355 1356 /* caller should f2fs_folio_put(folio, true); */ 1357 return f2fs_new_node_folio(&dn, 0); 1358 } 1359 1360 struct folio *f2fs_new_node_folio(struct dnode_of_data *dn, unsigned int ofs) 1361 { 1362 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1363 struct node_info new_ni; 1364 struct folio *folio; 1365 int err; 1366 1367 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) 1368 return ERR_PTR(-EPERM); 1369 1370 folio = f2fs_grab_cache_folio(NODE_MAPPING(sbi), dn->nid, false); 1371 if (IS_ERR(folio)) 1372 return folio; 1373 1374 if (unlikely((err = inc_valid_node_count(sbi, dn->inode, !ofs)))) 1375 goto fail; 1376 1377 #ifdef CONFIG_F2FS_CHECK_FS 1378 err = f2fs_get_node_info(sbi, dn->nid, &new_ni, false); 1379 if (err) { 1380 dec_valid_node_count(sbi, dn->inode, !ofs); 1381 goto fail; 1382 } 1383 if (unlikely(new_ni.blk_addr != NULL_ADDR)) { 1384 err = -EFSCORRUPTED; 1385 dec_valid_node_count(sbi, dn->inode, !ofs); 1386 set_sbi_flag(sbi, SBI_NEED_FSCK); 1387 f2fs_warn_ratelimited(sbi, 1388 "f2fs_new_node_folio: inconsistent nat entry, " 1389 "ino:%u, nid:%u, blkaddr:%u, ver:%u, flag:%u", 1390 new_ni.ino, new_ni.nid, new_ni.blk_addr, 1391 new_ni.version, new_ni.flag); 1392 f2fs_handle_error(sbi, ERROR_INCONSISTENT_NAT); 1393 goto fail; 1394 } 1395 #endif 1396 new_ni.nid = dn->nid; 1397 new_ni.ino = dn->inode->i_ino; 1398 new_ni.blk_addr = NULL_ADDR; 1399 new_ni.flag = 0; 1400 new_ni.version = 0; 1401 set_node_addr(sbi, &new_ni, NEW_ADDR, false); 1402 1403 f2fs_folio_wait_writeback(folio, NODE, true, true); 1404 fill_node_footer(folio, dn->nid, dn->inode->i_ino, ofs, true); 1405 set_cold_node(folio, S_ISDIR(dn->inode->i_mode)); 1406 if (!folio_test_uptodate(folio)) 1407 folio_mark_uptodate(folio); 1408 if (folio_mark_dirty(folio)) 1409 dn->node_changed = true; 1410 1411 if (f2fs_has_xattr_block(ofs)) 1412 f2fs_i_xnid_write(dn->inode, dn->nid); 1413 1414 if (ofs == 0) 1415 inc_valid_inode_count(sbi); 1416 return folio; 1417 fail: 1418 clear_node_folio_dirty(folio); 1419 f2fs_folio_put(folio, true); 1420 return ERR_PTR(err); 1421 } 1422 1423 /* 1424 * Caller should do after getting the following values. 1425 * 0: f2fs_folio_put(folio, false) 1426 * LOCKED_PAGE or error: f2fs_folio_put(folio, true) 1427 */ 1428 static int read_node_folio(struct folio *folio, blk_opf_t op_flags) 1429 { 1430 struct f2fs_sb_info *sbi = F2FS_F_SB(folio); 1431 struct node_info ni; 1432 struct f2fs_io_info fio = { 1433 .sbi = sbi, 1434 .type = NODE, 1435 .op = REQ_OP_READ, 1436 .op_flags = op_flags, 1437 .folio = folio, 1438 .encrypted_page = NULL, 1439 }; 1440 int err; 1441 1442 if (folio_test_uptodate(folio)) { 1443 if (!f2fs_inode_chksum_verify(sbi, folio)) { 1444 folio_clear_uptodate(folio); 1445 return -EFSBADCRC; 1446 } 1447 return LOCKED_PAGE; 1448 } 1449 1450 err = f2fs_get_node_info(sbi, folio->index, &ni, false); 1451 if (err) 1452 return err; 1453 1454 /* NEW_ADDR can be seen, after cp_error drops some dirty node pages */ 1455 if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR)) { 1456 folio_clear_uptodate(folio); 1457 return -ENOENT; 1458 } 1459 1460 fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr; 1461 1462 err = f2fs_submit_page_bio(&fio); 1463 1464 if (!err) 1465 f2fs_update_iostat(sbi, NULL, FS_NODE_READ_IO, F2FS_BLKSIZE); 1466 1467 return err; 1468 } 1469 1470 /* 1471 * Readahead a node page 1472 */ 1473 void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) 1474 { 1475 struct folio *afolio; 1476 int err; 1477 1478 if (!nid) 1479 return; 1480 if (f2fs_check_nid_range(sbi, nid)) 1481 return; 1482 1483 afolio = xa_load(&NODE_MAPPING(sbi)->i_pages, nid); 1484 if (afolio) 1485 return; 1486 1487 afolio = f2fs_grab_cache_folio(NODE_MAPPING(sbi), nid, false); 1488 if (IS_ERR(afolio)) 1489 return; 1490 1491 err = read_node_folio(afolio, REQ_RAHEAD); 1492 f2fs_folio_put(afolio, err ? true : false); 1493 } 1494 1495 static int sanity_check_node_footer(struct f2fs_sb_info *sbi, 1496 struct folio *folio, pgoff_t nid, 1497 enum node_type ntype) 1498 { 1499 if (unlikely(nid != nid_of_node(folio) || 1500 (ntype == NODE_TYPE_INODE && !IS_INODE(folio)) || 1501 (ntype == NODE_TYPE_XATTR && 1502 !f2fs_has_xattr_block(ofs_of_node(folio))) || 1503 time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))) { 1504 f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, " 1505 "node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", 1506 ntype, nid, nid_of_node(folio), ino_of_node(folio), 1507 ofs_of_node(folio), cpver_of_node(folio), 1508 next_blkaddr_of_node(folio)); 1509 set_sbi_flag(sbi, SBI_NEED_FSCK); 1510 f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); 1511 return -EFSCORRUPTED; 1512 } 1513 return 0; 1514 } 1515 1516 static struct folio *__get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid, 1517 struct folio *parent, int start, enum node_type ntype) 1518 { 1519 struct folio *folio; 1520 int err; 1521 1522 if (!nid) 1523 return ERR_PTR(-ENOENT); 1524 if (f2fs_check_nid_range(sbi, nid)) 1525 return ERR_PTR(-EINVAL); 1526 repeat: 1527 folio = f2fs_grab_cache_folio(NODE_MAPPING(sbi), nid, false); 1528 if (IS_ERR(folio)) 1529 return folio; 1530 1531 err = read_node_folio(folio, 0); 1532 if (err < 0) 1533 goto out_put_err; 1534 if (err == LOCKED_PAGE) 1535 goto page_hit; 1536 1537 if (parent) 1538 f2fs_ra_node_pages(parent, start + 1, MAX_RA_NODE); 1539 1540 folio_lock(folio); 1541 1542 if (unlikely(!is_node_folio(folio))) { 1543 f2fs_folio_put(folio, true); 1544 goto repeat; 1545 } 1546 1547 if (unlikely(!folio_test_uptodate(folio))) { 1548 err = -EIO; 1549 goto out_err; 1550 } 1551 1552 if (!f2fs_inode_chksum_verify(sbi, folio)) { 1553 err = -EFSBADCRC; 1554 goto out_err; 1555 } 1556 page_hit: 1557 err = sanity_check_node_footer(sbi, folio, nid, ntype); 1558 if (!err) 1559 return folio; 1560 out_err: 1561 folio_clear_uptodate(folio); 1562 out_put_err: 1563 /* ENOENT comes from read_node_folio which is not an error. */ 1564 if (err != -ENOENT) 1565 f2fs_handle_page_eio(sbi, folio, NODE); 1566 f2fs_folio_put(folio, true); 1567 return ERR_PTR(err); 1568 } 1569 1570 struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid) 1571 { 1572 return __get_node_folio(sbi, nid, NULL, 0, NODE_TYPE_REGULAR); 1573 } 1574 1575 struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino) 1576 { 1577 return __get_node_folio(sbi, ino, NULL, 0, NODE_TYPE_INODE); 1578 } 1579 1580 struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid) 1581 { 1582 return __get_node_folio(sbi, xnid, NULL, 0, NODE_TYPE_XATTR); 1583 } 1584 1585 static struct folio *f2fs_get_node_folio_ra(struct folio *parent, int start) 1586 { 1587 struct f2fs_sb_info *sbi = F2FS_F_SB(parent); 1588 nid_t nid = get_nid(parent, start, false); 1589 1590 return __get_node_folio(sbi, nid, parent, start, NODE_TYPE_REGULAR); 1591 } 1592 1593 static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) 1594 { 1595 struct inode *inode; 1596 struct folio *folio; 1597 int ret; 1598 1599 /* should flush inline_data before evict_inode */ 1600 inode = ilookup(sbi->sb, ino); 1601 if (!inode) 1602 return; 1603 1604 folio = f2fs_filemap_get_folio(inode->i_mapping, 0, 1605 FGP_LOCK|FGP_NOWAIT, 0); 1606 if (IS_ERR(folio)) 1607 goto iput_out; 1608 1609 if (!folio_test_uptodate(folio)) 1610 goto folio_out; 1611 1612 if (!folio_test_dirty(folio)) 1613 goto folio_out; 1614 1615 if (!folio_clear_dirty_for_io(folio)) 1616 goto folio_out; 1617 1618 ret = f2fs_write_inline_data(inode, folio); 1619 inode_dec_dirty_pages(inode); 1620 f2fs_remove_dirty_inode(inode); 1621 if (ret) 1622 folio_mark_dirty(folio); 1623 folio_out: 1624 f2fs_folio_put(folio, true); 1625 iput_out: 1626 iput(inode); 1627 } 1628 1629 static struct folio *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) 1630 { 1631 pgoff_t index; 1632 struct folio_batch fbatch; 1633 struct folio *last_folio = NULL; 1634 int nr_folios; 1635 1636 folio_batch_init(&fbatch); 1637 index = 0; 1638 1639 while ((nr_folios = filemap_get_folios_tag(NODE_MAPPING(sbi), &index, 1640 (pgoff_t)-1, PAGECACHE_TAG_DIRTY, 1641 &fbatch))) { 1642 int i; 1643 1644 for (i = 0; i < nr_folios; i++) { 1645 struct folio *folio = fbatch.folios[i]; 1646 1647 if (unlikely(f2fs_cp_error(sbi))) { 1648 f2fs_folio_put(last_folio, false); 1649 folio_batch_release(&fbatch); 1650 return ERR_PTR(-EIO); 1651 } 1652 1653 if (!IS_DNODE(folio) || !is_cold_node(folio)) 1654 continue; 1655 if (ino_of_node(folio) != ino) 1656 continue; 1657 1658 folio_lock(folio); 1659 1660 if (unlikely(!is_node_folio(folio))) { 1661 continue_unlock: 1662 folio_unlock(folio); 1663 continue; 1664 } 1665 if (ino_of_node(folio) != ino) 1666 goto continue_unlock; 1667 1668 if (!folio_test_dirty(folio)) { 1669 /* someone wrote it for us */ 1670 goto continue_unlock; 1671 } 1672 1673 if (last_folio) 1674 f2fs_folio_put(last_folio, false); 1675 1676 folio_get(folio); 1677 last_folio = folio; 1678 folio_unlock(folio); 1679 } 1680 folio_batch_release(&fbatch); 1681 cond_resched(); 1682 } 1683 return last_folio; 1684 } 1685 1686 static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted, 1687 struct writeback_control *wbc, bool do_balance, 1688 enum iostat_type io_type, unsigned int *seq_id) 1689 { 1690 struct f2fs_sb_info *sbi = F2FS_F_SB(folio); 1691 nid_t nid; 1692 struct node_info ni; 1693 struct f2fs_io_info fio = { 1694 .sbi = sbi, 1695 .ino = ino_of_node(folio), 1696 .type = NODE, 1697 .op = REQ_OP_WRITE, 1698 .op_flags = wbc_to_write_flags(wbc), 1699 .folio = folio, 1700 .encrypted_page = NULL, 1701 .submitted = 0, 1702 .io_type = io_type, 1703 .io_wbc = wbc, 1704 }; 1705 unsigned int seq; 1706 1707 trace_f2fs_writepage(folio, NODE); 1708 1709 if (unlikely(f2fs_cp_error(sbi))) { 1710 /* keep node pages in remount-ro mode */ 1711 if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY) 1712 goto redirty_out; 1713 folio_clear_uptodate(folio); 1714 dec_page_count(sbi, F2FS_DIRTY_NODES); 1715 folio_unlock(folio); 1716 return true; 1717 } 1718 1719 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 1720 goto redirty_out; 1721 1722 if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) && 1723 wbc->sync_mode == WB_SYNC_NONE && 1724 IS_DNODE(folio) && is_cold_node(folio)) 1725 goto redirty_out; 1726 1727 /* get old block addr of this node page */ 1728 nid = nid_of_node(folio); 1729 f2fs_bug_on(sbi, folio->index != nid); 1730 1731 if (f2fs_get_node_info(sbi, nid, &ni, !do_balance)) 1732 goto redirty_out; 1733 1734 f2fs_down_read(&sbi->node_write); 1735 1736 /* This page is already truncated */ 1737 if (unlikely(ni.blk_addr == NULL_ADDR)) { 1738 folio_clear_uptodate(folio); 1739 dec_page_count(sbi, F2FS_DIRTY_NODES); 1740 f2fs_up_read(&sbi->node_write); 1741 folio_unlock(folio); 1742 return true; 1743 } 1744 1745 if (__is_valid_data_blkaddr(ni.blk_addr) && 1746 !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, 1747 DATA_GENERIC_ENHANCE)) { 1748 f2fs_up_read(&sbi->node_write); 1749 goto redirty_out; 1750 } 1751 1752 if (atomic && !test_opt(sbi, NOBARRIER)) 1753 fio.op_flags |= REQ_PREFLUSH | REQ_FUA; 1754 1755 /* should add to global list before clearing PAGECACHE status */ 1756 if (f2fs_in_warm_node_list(sbi, folio)) { 1757 seq = f2fs_add_fsync_node_entry(sbi, folio); 1758 if (seq_id) 1759 *seq_id = seq; 1760 } 1761 1762 folio_start_writeback(folio); 1763 1764 fio.old_blkaddr = ni.blk_addr; 1765 f2fs_do_write_node_page(nid, &fio); 1766 set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(folio)); 1767 dec_page_count(sbi, F2FS_DIRTY_NODES); 1768 f2fs_up_read(&sbi->node_write); 1769 1770 folio_unlock(folio); 1771 1772 if (unlikely(f2fs_cp_error(sbi))) { 1773 f2fs_submit_merged_write(sbi, NODE); 1774 submitted = NULL; 1775 } 1776 if (submitted) 1777 *submitted = fio.submitted; 1778 1779 if (do_balance) 1780 f2fs_balance_fs(sbi, false); 1781 return true; 1782 1783 redirty_out: 1784 folio_redirty_for_writepage(wbc, folio); 1785 folio_unlock(folio); 1786 return false; 1787 } 1788 1789 int f2fs_move_node_folio(struct folio *node_folio, int gc_type) 1790 { 1791 int err = 0; 1792 1793 if (gc_type == FG_GC) { 1794 struct writeback_control wbc = { 1795 .sync_mode = WB_SYNC_ALL, 1796 .nr_to_write = 1, 1797 }; 1798 1799 f2fs_folio_wait_writeback(node_folio, NODE, true, true); 1800 1801 folio_mark_dirty(node_folio); 1802 1803 if (!folio_clear_dirty_for_io(node_folio)) { 1804 err = -EAGAIN; 1805 goto out_page; 1806 } 1807 1808 if (!__write_node_folio(node_folio, false, NULL, 1809 &wbc, false, FS_GC_NODE_IO, NULL)) 1810 err = -EAGAIN; 1811 goto release_page; 1812 } else { 1813 /* set page dirty and write it */ 1814 if (!folio_test_writeback(node_folio)) 1815 folio_mark_dirty(node_folio); 1816 } 1817 out_page: 1818 folio_unlock(node_folio); 1819 release_page: 1820 f2fs_folio_put(node_folio, false); 1821 return err; 1822 } 1823 1824 int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, 1825 struct writeback_control *wbc, bool atomic, 1826 unsigned int *seq_id) 1827 { 1828 pgoff_t index; 1829 struct folio_batch fbatch; 1830 int ret = 0; 1831 struct folio *last_folio = NULL; 1832 bool marked = false; 1833 nid_t ino = inode->i_ino; 1834 int nr_folios; 1835 int nwritten = 0; 1836 1837 if (atomic) { 1838 last_folio = last_fsync_dnode(sbi, ino); 1839 if (IS_ERR_OR_NULL(last_folio)) 1840 return PTR_ERR_OR_ZERO(last_folio); 1841 } 1842 retry: 1843 folio_batch_init(&fbatch); 1844 index = 0; 1845 1846 while ((nr_folios = filemap_get_folios_tag(NODE_MAPPING(sbi), &index, 1847 (pgoff_t)-1, PAGECACHE_TAG_DIRTY, 1848 &fbatch))) { 1849 int i; 1850 1851 for (i = 0; i < nr_folios; i++) { 1852 struct folio *folio = fbatch.folios[i]; 1853 bool submitted = false; 1854 1855 if (unlikely(f2fs_cp_error(sbi))) { 1856 f2fs_folio_put(last_folio, false); 1857 folio_batch_release(&fbatch); 1858 ret = -EIO; 1859 goto out; 1860 } 1861 1862 if (!IS_DNODE(folio) || !is_cold_node(folio)) 1863 continue; 1864 if (ino_of_node(folio) != ino) 1865 continue; 1866 1867 folio_lock(folio); 1868 1869 if (unlikely(!is_node_folio(folio))) { 1870 continue_unlock: 1871 folio_unlock(folio); 1872 continue; 1873 } 1874 if (ino_of_node(folio) != ino) 1875 goto continue_unlock; 1876 1877 if (!folio_test_dirty(folio) && folio != last_folio) { 1878 /* someone wrote it for us */ 1879 goto continue_unlock; 1880 } 1881 1882 f2fs_folio_wait_writeback(folio, NODE, true, true); 1883 1884 set_fsync_mark(folio, 0); 1885 set_dentry_mark(folio, 0); 1886 1887 if (!atomic || folio == last_folio) { 1888 set_fsync_mark(folio, 1); 1889 percpu_counter_inc(&sbi->rf_node_block_count); 1890 if (IS_INODE(folio)) { 1891 if (is_inode_flag_set(inode, 1892 FI_DIRTY_INODE)) 1893 f2fs_update_inode(inode, folio); 1894 set_dentry_mark(folio, 1895 f2fs_need_dentry_mark(sbi, ino)); 1896 } 1897 /* may be written by other thread */ 1898 if (!folio_test_dirty(folio)) 1899 folio_mark_dirty(folio); 1900 } 1901 1902 if (!folio_clear_dirty_for_io(folio)) 1903 goto continue_unlock; 1904 1905 if (!__write_node_folio(folio, atomic && 1906 folio == last_folio, 1907 &submitted, wbc, true, 1908 FS_NODE_IO, seq_id)) { 1909 f2fs_folio_put(last_folio, false); 1910 folio_batch_release(&fbatch); 1911 ret = -EIO; 1912 goto out; 1913 } 1914 if (submitted) 1915 nwritten++; 1916 1917 if (folio == last_folio) { 1918 f2fs_folio_put(folio, false); 1919 folio_batch_release(&fbatch); 1920 marked = true; 1921 goto out; 1922 } 1923 } 1924 folio_batch_release(&fbatch); 1925 cond_resched(); 1926 } 1927 if (atomic && !marked) { 1928 f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx", 1929 ino, last_folio->index); 1930 folio_lock(last_folio); 1931 f2fs_folio_wait_writeback(last_folio, NODE, true, true); 1932 folio_mark_dirty(last_folio); 1933 folio_unlock(last_folio); 1934 goto retry; 1935 } 1936 out: 1937 if (nwritten) 1938 f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE); 1939 return ret; 1940 } 1941 1942 static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data) 1943 { 1944 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1945 bool clean; 1946 1947 if (inode->i_ino != ino) 1948 return 0; 1949 1950 if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) 1951 return 0; 1952 1953 spin_lock(&sbi->inode_lock[DIRTY_META]); 1954 clean = list_empty(&F2FS_I(inode)->gdirty_list); 1955 spin_unlock(&sbi->inode_lock[DIRTY_META]); 1956 1957 if (clean) 1958 return 0; 1959 1960 inode = igrab(inode); 1961 if (!inode) 1962 return 0; 1963 return 1; 1964 } 1965 1966 static bool flush_dirty_inode(struct folio *folio) 1967 { 1968 struct f2fs_sb_info *sbi = F2FS_F_SB(folio); 1969 struct inode *inode; 1970 nid_t ino = ino_of_node(folio); 1971 1972 inode = find_inode_nowait(sbi->sb, ino, f2fs_match_ino, NULL); 1973 if (!inode) 1974 return false; 1975 1976 f2fs_update_inode(inode, folio); 1977 folio_unlock(folio); 1978 1979 iput(inode); 1980 return true; 1981 } 1982 1983 void f2fs_flush_inline_data(struct f2fs_sb_info *sbi) 1984 { 1985 pgoff_t index = 0; 1986 struct folio_batch fbatch; 1987 int nr_folios; 1988 1989 folio_batch_init(&fbatch); 1990 1991 while ((nr_folios = filemap_get_folios_tag(NODE_MAPPING(sbi), &index, 1992 (pgoff_t)-1, PAGECACHE_TAG_DIRTY, 1993 &fbatch))) { 1994 int i; 1995 1996 for (i = 0; i < nr_folios; i++) { 1997 struct folio *folio = fbatch.folios[i]; 1998 1999 if (!IS_INODE(folio)) 2000 continue; 2001 2002 folio_lock(folio); 2003 2004 if (unlikely(!is_node_folio(folio))) 2005 goto unlock; 2006 if (!folio_test_dirty(folio)) 2007 goto unlock; 2008 2009 /* flush inline_data, if it's async context. */ 2010 if (folio_test_f2fs_inline(folio)) { 2011 folio_clear_f2fs_inline(folio); 2012 folio_unlock(folio); 2013 flush_inline_data(sbi, ino_of_node(folio)); 2014 continue; 2015 } 2016 unlock: 2017 folio_unlock(folio); 2018 } 2019 folio_batch_release(&fbatch); 2020 cond_resched(); 2021 } 2022 } 2023 2024 int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, 2025 struct writeback_control *wbc, 2026 bool do_balance, enum iostat_type io_type) 2027 { 2028 pgoff_t index; 2029 struct folio_batch fbatch; 2030 int step = 0; 2031 int nwritten = 0; 2032 int ret = 0; 2033 int nr_folios, done = 0; 2034 2035 folio_batch_init(&fbatch); 2036 2037 next_step: 2038 index = 0; 2039 2040 while (!done && (nr_folios = filemap_get_folios_tag(NODE_MAPPING(sbi), 2041 &index, (pgoff_t)-1, PAGECACHE_TAG_DIRTY, 2042 &fbatch))) { 2043 int i; 2044 2045 for (i = 0; i < nr_folios; i++) { 2046 struct folio *folio = fbatch.folios[i]; 2047 bool submitted = false; 2048 2049 /* give a priority to WB_SYNC threads */ 2050 if (atomic_read(&sbi->wb_sync_req[NODE]) && 2051 wbc->sync_mode == WB_SYNC_NONE) { 2052 done = 1; 2053 break; 2054 } 2055 2056 /* 2057 * flushing sequence with step: 2058 * 0. indirect nodes 2059 * 1. dentry dnodes 2060 * 2. file dnodes 2061 */ 2062 if (step == 0 && IS_DNODE(folio)) 2063 continue; 2064 if (step == 1 && (!IS_DNODE(folio) || 2065 is_cold_node(folio))) 2066 continue; 2067 if (step == 2 && (!IS_DNODE(folio) || 2068 !is_cold_node(folio))) 2069 continue; 2070 lock_node: 2071 if (wbc->sync_mode == WB_SYNC_ALL) 2072 folio_lock(folio); 2073 else if (!folio_trylock(folio)) 2074 continue; 2075 2076 if (unlikely(!is_node_folio(folio))) { 2077 continue_unlock: 2078 folio_unlock(folio); 2079 continue; 2080 } 2081 2082 if (!folio_test_dirty(folio)) { 2083 /* someone wrote it for us */ 2084 goto continue_unlock; 2085 } 2086 2087 /* flush inline_data/inode, if it's async context. */ 2088 if (!do_balance) 2089 goto write_node; 2090 2091 /* flush inline_data */ 2092 if (folio_test_f2fs_inline(folio)) { 2093 folio_clear_f2fs_inline(folio); 2094 folio_unlock(folio); 2095 flush_inline_data(sbi, ino_of_node(folio)); 2096 goto lock_node; 2097 } 2098 2099 /* flush dirty inode */ 2100 if (IS_INODE(folio) && flush_dirty_inode(folio)) 2101 goto lock_node; 2102 write_node: 2103 f2fs_folio_wait_writeback(folio, NODE, true, true); 2104 2105 if (!folio_clear_dirty_for_io(folio)) 2106 goto continue_unlock; 2107 2108 set_fsync_mark(folio, 0); 2109 set_dentry_mark(folio, 0); 2110 2111 if (!__write_node_folio(folio, false, &submitted, 2112 wbc, do_balance, io_type, NULL)) { 2113 folio_batch_release(&fbatch); 2114 ret = -EIO; 2115 goto out; 2116 } 2117 if (submitted) 2118 nwritten++; 2119 2120 if (--wbc->nr_to_write == 0) 2121 break; 2122 } 2123 folio_batch_release(&fbatch); 2124 cond_resched(); 2125 2126 if (wbc->nr_to_write == 0) { 2127 step = 2; 2128 break; 2129 } 2130 } 2131 2132 if (step < 2) { 2133 if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) && 2134 wbc->sync_mode == WB_SYNC_NONE && step == 1) 2135 goto out; 2136 step++; 2137 goto next_step; 2138 } 2139 out: 2140 if (nwritten) 2141 f2fs_submit_merged_write(sbi, NODE); 2142 2143 if (unlikely(f2fs_cp_error(sbi))) 2144 return -EIO; 2145 return ret; 2146 } 2147 2148 int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, 2149 unsigned int seq_id) 2150 { 2151 struct fsync_node_entry *fn; 2152 struct list_head *head = &sbi->fsync_node_list; 2153 unsigned long flags; 2154 unsigned int cur_seq_id = 0; 2155 2156 while (seq_id && cur_seq_id < seq_id) { 2157 struct folio *folio; 2158 2159 spin_lock_irqsave(&sbi->fsync_node_lock, flags); 2160 if (list_empty(head)) { 2161 spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); 2162 break; 2163 } 2164 fn = list_first_entry(head, struct fsync_node_entry, list); 2165 if (fn->seq_id > seq_id) { 2166 spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); 2167 break; 2168 } 2169 cur_seq_id = fn->seq_id; 2170 folio = fn->folio; 2171 folio_get(folio); 2172 spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); 2173 2174 f2fs_folio_wait_writeback(folio, NODE, true, false); 2175 2176 folio_put(folio); 2177 } 2178 2179 return filemap_check_errors(NODE_MAPPING(sbi)); 2180 } 2181 2182 static int f2fs_write_node_pages(struct address_space *mapping, 2183 struct writeback_control *wbc) 2184 { 2185 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); 2186 struct blk_plug plug; 2187 long diff; 2188 2189 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 2190 goto skip_write; 2191 2192 /* balancing f2fs's metadata in background */ 2193 f2fs_balance_fs_bg(sbi, true); 2194 2195 /* collect a number of dirty node pages and write together */ 2196 if (wbc->sync_mode != WB_SYNC_ALL && 2197 get_pages(sbi, F2FS_DIRTY_NODES) < 2198 nr_pages_to_skip(sbi, NODE)) 2199 goto skip_write; 2200 2201 if (wbc->sync_mode == WB_SYNC_ALL) 2202 atomic_inc(&sbi->wb_sync_req[NODE]); 2203 else if (atomic_read(&sbi->wb_sync_req[NODE])) { 2204 /* to avoid potential deadlock */ 2205 if (current->plug) 2206 blk_finish_plug(current->plug); 2207 goto skip_write; 2208 } 2209 2210 trace_f2fs_writepages(mapping->host, wbc, NODE); 2211 2212 diff = nr_pages_to_write(sbi, NODE, wbc); 2213 blk_start_plug(&plug); 2214 f2fs_sync_node_pages(sbi, wbc, true, FS_NODE_IO); 2215 blk_finish_plug(&plug); 2216 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); 2217 2218 if (wbc->sync_mode == WB_SYNC_ALL) 2219 atomic_dec(&sbi->wb_sync_req[NODE]); 2220 return 0; 2221 2222 skip_write: 2223 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES); 2224 trace_f2fs_writepages(mapping->host, wbc, NODE); 2225 return 0; 2226 } 2227 2228 static bool f2fs_dirty_node_folio(struct address_space *mapping, 2229 struct folio *folio) 2230 { 2231 trace_f2fs_set_page_dirty(folio, NODE); 2232 2233 if (!folio_test_uptodate(folio)) 2234 folio_mark_uptodate(folio); 2235 #ifdef CONFIG_F2FS_CHECK_FS 2236 if (IS_INODE(folio)) 2237 f2fs_inode_chksum_set(F2FS_M_SB(mapping), folio); 2238 #endif 2239 if (filemap_dirty_folio(mapping, folio)) { 2240 inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); 2241 folio_set_f2fs_reference(folio); 2242 return true; 2243 } 2244 return false; 2245 } 2246 2247 /* 2248 * Structure of the f2fs node operations 2249 */ 2250 const struct address_space_operations f2fs_node_aops = { 2251 .writepages = f2fs_write_node_pages, 2252 .dirty_folio = f2fs_dirty_node_folio, 2253 .invalidate_folio = f2fs_invalidate_folio, 2254 .release_folio = f2fs_release_folio, 2255 .migrate_folio = filemap_migrate_folio, 2256 }; 2257 2258 static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, 2259 nid_t n) 2260 { 2261 return radix_tree_lookup(&nm_i->free_nid_root, n); 2262 } 2263 2264 static int __insert_free_nid(struct f2fs_sb_info *sbi, 2265 struct free_nid *i) 2266 { 2267 struct f2fs_nm_info *nm_i = NM_I(sbi); 2268 int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i); 2269 2270 if (err) 2271 return err; 2272 2273 nm_i->nid_cnt[FREE_NID]++; 2274 list_add_tail(&i->list, &nm_i->free_nid_list); 2275 return 0; 2276 } 2277 2278 static void __remove_free_nid(struct f2fs_sb_info *sbi, 2279 struct free_nid *i, enum nid_state state) 2280 { 2281 struct f2fs_nm_info *nm_i = NM_I(sbi); 2282 2283 f2fs_bug_on(sbi, state != i->state); 2284 nm_i->nid_cnt[state]--; 2285 if (state == FREE_NID) 2286 list_del(&i->list); 2287 radix_tree_delete(&nm_i->free_nid_root, i->nid); 2288 } 2289 2290 static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i, 2291 enum nid_state org_state, enum nid_state dst_state) 2292 { 2293 struct f2fs_nm_info *nm_i = NM_I(sbi); 2294 2295 f2fs_bug_on(sbi, org_state != i->state); 2296 i->state = dst_state; 2297 nm_i->nid_cnt[org_state]--; 2298 nm_i->nid_cnt[dst_state]++; 2299 2300 switch (dst_state) { 2301 case PREALLOC_NID: 2302 list_del(&i->list); 2303 break; 2304 case FREE_NID: 2305 list_add_tail(&i->list, &nm_i->free_nid_list); 2306 break; 2307 default: 2308 BUG_ON(1); 2309 } 2310 } 2311 2312 static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, 2313 bool set, bool build) 2314 { 2315 struct f2fs_nm_info *nm_i = NM_I(sbi); 2316 unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); 2317 unsigned int nid_ofs = nid - START_NID(nid); 2318 2319 if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) 2320 return; 2321 2322 if (set) { 2323 if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs])) 2324 return; 2325 __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); 2326 nm_i->free_nid_count[nat_ofs]++; 2327 } else { 2328 if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs])) 2329 return; 2330 __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); 2331 if (!build) 2332 nm_i->free_nid_count[nat_ofs]--; 2333 } 2334 } 2335 2336 /* return if the nid is recognized as free */ 2337 static bool add_free_nid(struct f2fs_sb_info *sbi, 2338 nid_t nid, bool build, bool update) 2339 { 2340 struct f2fs_nm_info *nm_i = NM_I(sbi); 2341 struct free_nid *i, *e; 2342 struct nat_entry *ne; 2343 int err; 2344 bool ret = false; 2345 2346 /* 0 nid should not be used */ 2347 if (unlikely(nid == 0)) 2348 return false; 2349 2350 if (unlikely(f2fs_check_nid_range(sbi, nid))) 2351 return false; 2352 2353 i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS, true, NULL); 2354 i->nid = nid; 2355 i->state = FREE_NID; 2356 2357 err = radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); 2358 f2fs_bug_on(sbi, err); 2359 2360 err = -EINVAL; 2361 2362 spin_lock(&nm_i->nid_list_lock); 2363 2364 if (build) { 2365 /* 2366 * Thread A Thread B 2367 * - f2fs_create 2368 * - f2fs_new_inode 2369 * - f2fs_alloc_nid 2370 * - __insert_nid_to_list(PREALLOC_NID) 2371 * - f2fs_balance_fs_bg 2372 * - f2fs_build_free_nids 2373 * - __f2fs_build_free_nids 2374 * - scan_nat_page 2375 * - add_free_nid 2376 * - __lookup_nat_cache 2377 * - f2fs_add_link 2378 * - f2fs_init_inode_metadata 2379 * - f2fs_new_inode_folio 2380 * - f2fs_new_node_folio 2381 * - set_node_addr 2382 * - f2fs_alloc_nid_done 2383 * - __remove_nid_from_list(PREALLOC_NID) 2384 * - __insert_nid_to_list(FREE_NID) 2385 */ 2386 ne = __lookup_nat_cache(nm_i, nid, false); 2387 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || 2388 nat_get_blkaddr(ne) != NULL_ADDR)) 2389 goto err_out; 2390 2391 e = __lookup_free_nid_list(nm_i, nid); 2392 if (e) { 2393 if (e->state == FREE_NID) 2394 ret = true; 2395 goto err_out; 2396 } 2397 } 2398 ret = true; 2399 err = __insert_free_nid(sbi, i); 2400 err_out: 2401 if (update) { 2402 update_free_nid_bitmap(sbi, nid, ret, build); 2403 if (!build) 2404 nm_i->available_nids++; 2405 } 2406 spin_unlock(&nm_i->nid_list_lock); 2407 radix_tree_preload_end(); 2408 2409 if (err) 2410 kmem_cache_free(free_nid_slab, i); 2411 return ret; 2412 } 2413 2414 static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) 2415 { 2416 struct f2fs_nm_info *nm_i = NM_I(sbi); 2417 struct free_nid *i; 2418 bool need_free = false; 2419 2420 spin_lock(&nm_i->nid_list_lock); 2421 i = __lookup_free_nid_list(nm_i, nid); 2422 if (i && i->state == FREE_NID) { 2423 __remove_free_nid(sbi, i, FREE_NID); 2424 need_free = true; 2425 } 2426 spin_unlock(&nm_i->nid_list_lock); 2427 2428 if (need_free) 2429 kmem_cache_free(free_nid_slab, i); 2430 } 2431 2432 static int scan_nat_page(struct f2fs_sb_info *sbi, 2433 struct f2fs_nat_block *nat_blk, nid_t start_nid) 2434 { 2435 struct f2fs_nm_info *nm_i = NM_I(sbi); 2436 block_t blk_addr; 2437 unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid); 2438 int i; 2439 2440 __set_bit_le(nat_ofs, nm_i->nat_block_bitmap); 2441 2442 i = start_nid % NAT_ENTRY_PER_BLOCK; 2443 2444 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { 2445 if (unlikely(start_nid >= nm_i->max_nid)) 2446 break; 2447 2448 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); 2449 2450 if (blk_addr == NEW_ADDR) 2451 return -EFSCORRUPTED; 2452 2453 if (blk_addr == NULL_ADDR) { 2454 add_free_nid(sbi, start_nid, true, true); 2455 } else { 2456 spin_lock(&NM_I(sbi)->nid_list_lock); 2457 update_free_nid_bitmap(sbi, start_nid, false, true); 2458 spin_unlock(&NM_I(sbi)->nid_list_lock); 2459 } 2460 } 2461 2462 return 0; 2463 } 2464 2465 static void scan_curseg_cache(struct f2fs_sb_info *sbi) 2466 { 2467 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 2468 struct f2fs_journal *journal = curseg->journal; 2469 int i; 2470 2471 down_read(&curseg->journal_rwsem); 2472 for (i = 0; i < nats_in_cursum(journal); i++) { 2473 block_t addr; 2474 nid_t nid; 2475 2476 addr = le32_to_cpu(nat_in_journal(journal, i).block_addr); 2477 nid = le32_to_cpu(nid_in_journal(journal, i)); 2478 if (addr == NULL_ADDR) 2479 add_free_nid(sbi, nid, true, false); 2480 else 2481 remove_free_nid(sbi, nid); 2482 } 2483 up_read(&curseg->journal_rwsem); 2484 } 2485 2486 static void scan_free_nid_bits(struct f2fs_sb_info *sbi) 2487 { 2488 struct f2fs_nm_info *nm_i = NM_I(sbi); 2489 unsigned int i, idx; 2490 nid_t nid; 2491 2492 f2fs_down_read(&nm_i->nat_tree_lock); 2493 2494 for (i = 0; i < nm_i->nat_blocks; i++) { 2495 if (!test_bit_le(i, nm_i->nat_block_bitmap)) 2496 continue; 2497 if (!nm_i->free_nid_count[i]) 2498 continue; 2499 for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) { 2500 idx = find_next_bit_le(nm_i->free_nid_bitmap[i], 2501 NAT_ENTRY_PER_BLOCK, idx); 2502 if (idx >= NAT_ENTRY_PER_BLOCK) 2503 break; 2504 2505 nid = i * NAT_ENTRY_PER_BLOCK + idx; 2506 add_free_nid(sbi, nid, true, false); 2507 2508 if (nm_i->nid_cnt[FREE_NID] >= MAX_FREE_NIDS) 2509 goto out; 2510 } 2511 } 2512 out: 2513 scan_curseg_cache(sbi); 2514 2515 f2fs_up_read(&nm_i->nat_tree_lock); 2516 } 2517 2518 static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, 2519 bool sync, bool mount) 2520 { 2521 struct f2fs_nm_info *nm_i = NM_I(sbi); 2522 int i = 0, ret; 2523 nid_t nid = nm_i->next_scan_nid; 2524 2525 if (unlikely(nid >= nm_i->max_nid)) 2526 nid = 0; 2527 2528 if (unlikely(nid % NAT_ENTRY_PER_BLOCK)) 2529 nid = NAT_BLOCK_OFFSET(nid) * NAT_ENTRY_PER_BLOCK; 2530 2531 /* Enough entries */ 2532 if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) 2533 return 0; 2534 2535 if (!sync && !f2fs_available_free_memory(sbi, FREE_NIDS)) 2536 return 0; 2537 2538 if (!mount) { 2539 /* try to find free nids in free_nid_bitmap */ 2540 scan_free_nid_bits(sbi); 2541 2542 if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) 2543 return 0; 2544 } 2545 2546 /* readahead nat pages to be scanned */ 2547 f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, 2548 META_NAT, true); 2549 2550 f2fs_down_read(&nm_i->nat_tree_lock); 2551 2552 while (1) { 2553 if (!test_bit_le(NAT_BLOCK_OFFSET(nid), 2554 nm_i->nat_block_bitmap)) { 2555 struct folio *folio = get_current_nat_folio(sbi, nid); 2556 2557 if (IS_ERR(folio)) { 2558 ret = PTR_ERR(folio); 2559 } else { 2560 ret = scan_nat_page(sbi, folio_address(folio), 2561 nid); 2562 f2fs_folio_put(folio, true); 2563 } 2564 2565 if (ret) { 2566 f2fs_up_read(&nm_i->nat_tree_lock); 2567 2568 if (ret == -EFSCORRUPTED) { 2569 f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); 2570 set_sbi_flag(sbi, SBI_NEED_FSCK); 2571 f2fs_handle_error(sbi, 2572 ERROR_INCONSISTENT_NAT); 2573 } 2574 2575 return ret; 2576 } 2577 } 2578 2579 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); 2580 if (unlikely(nid >= nm_i->max_nid)) 2581 nid = 0; 2582 2583 if (++i >= FREE_NID_PAGES) 2584 break; 2585 } 2586 2587 /* go to the next free nat pages to find free nids abundantly */ 2588 nm_i->next_scan_nid = nid; 2589 2590 /* find free nids from current sum_pages */ 2591 scan_curseg_cache(sbi); 2592 2593 f2fs_up_read(&nm_i->nat_tree_lock); 2594 2595 f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), 2596 nm_i->ra_nid_pages, META_NAT, false); 2597 2598 return 0; 2599 } 2600 2601 int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) 2602 { 2603 int ret; 2604 2605 mutex_lock(&NM_I(sbi)->build_lock); 2606 ret = __f2fs_build_free_nids(sbi, sync, mount); 2607 mutex_unlock(&NM_I(sbi)->build_lock); 2608 2609 return ret; 2610 } 2611 2612 /* 2613 * If this function returns success, caller can obtain a new nid 2614 * from second parameter of this function. 2615 * The returned nid could be used ino as well as nid when inode is created. 2616 */ 2617 bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) 2618 { 2619 struct f2fs_nm_info *nm_i = NM_I(sbi); 2620 struct free_nid *i = NULL; 2621 retry: 2622 if (time_to_inject(sbi, FAULT_ALLOC_NID)) 2623 return false; 2624 2625 spin_lock(&nm_i->nid_list_lock); 2626 2627 if (unlikely(nm_i->available_nids == 0)) { 2628 spin_unlock(&nm_i->nid_list_lock); 2629 return false; 2630 } 2631 2632 /* We should not use stale free nids created by f2fs_build_free_nids */ 2633 if (nm_i->nid_cnt[FREE_NID] && !on_f2fs_build_free_nids(nm_i)) { 2634 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); 2635 i = list_first_entry(&nm_i->free_nid_list, 2636 struct free_nid, list); 2637 *nid = i->nid; 2638 2639 __move_free_nid(sbi, i, FREE_NID, PREALLOC_NID); 2640 nm_i->available_nids--; 2641 2642 update_free_nid_bitmap(sbi, *nid, false, false); 2643 2644 spin_unlock(&nm_i->nid_list_lock); 2645 return true; 2646 } 2647 spin_unlock(&nm_i->nid_list_lock); 2648 2649 /* Let's scan nat pages and its caches to get free nids */ 2650 if (!f2fs_build_free_nids(sbi, true, false)) 2651 goto retry; 2652 return false; 2653 } 2654 2655 /* 2656 * f2fs_alloc_nid() should be called prior to this function. 2657 */ 2658 void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) 2659 { 2660 struct f2fs_nm_info *nm_i = NM_I(sbi); 2661 struct free_nid *i; 2662 2663 spin_lock(&nm_i->nid_list_lock); 2664 i = __lookup_free_nid_list(nm_i, nid); 2665 f2fs_bug_on(sbi, !i); 2666 __remove_free_nid(sbi, i, PREALLOC_NID); 2667 spin_unlock(&nm_i->nid_list_lock); 2668 2669 kmem_cache_free(free_nid_slab, i); 2670 } 2671 2672 /* 2673 * f2fs_alloc_nid() should be called prior to this function. 2674 */ 2675 void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) 2676 { 2677 struct f2fs_nm_info *nm_i = NM_I(sbi); 2678 struct free_nid *i; 2679 bool need_free = false; 2680 2681 if (!nid) 2682 return; 2683 2684 spin_lock(&nm_i->nid_list_lock); 2685 i = __lookup_free_nid_list(nm_i, nid); 2686 f2fs_bug_on(sbi, !i); 2687 2688 if (!f2fs_available_free_memory(sbi, FREE_NIDS)) { 2689 __remove_free_nid(sbi, i, PREALLOC_NID); 2690 need_free = true; 2691 } else { 2692 __move_free_nid(sbi, i, PREALLOC_NID, FREE_NID); 2693 } 2694 2695 nm_i->available_nids++; 2696 2697 update_free_nid_bitmap(sbi, nid, true, false); 2698 2699 spin_unlock(&nm_i->nid_list_lock); 2700 2701 if (need_free) 2702 kmem_cache_free(free_nid_slab, i); 2703 } 2704 2705 int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) 2706 { 2707 struct f2fs_nm_info *nm_i = NM_I(sbi); 2708 int nr = nr_shrink; 2709 2710 if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) 2711 return 0; 2712 2713 if (!mutex_trylock(&nm_i->build_lock)) 2714 return 0; 2715 2716 while (nr_shrink && nm_i->nid_cnt[FREE_NID] > MAX_FREE_NIDS) { 2717 struct free_nid *i, *next; 2718 unsigned int batch = SHRINK_NID_BATCH_SIZE; 2719 2720 spin_lock(&nm_i->nid_list_lock); 2721 list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { 2722 if (!nr_shrink || !batch || 2723 nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) 2724 break; 2725 __remove_free_nid(sbi, i, FREE_NID); 2726 kmem_cache_free(free_nid_slab, i); 2727 nr_shrink--; 2728 batch--; 2729 } 2730 spin_unlock(&nm_i->nid_list_lock); 2731 } 2732 2733 mutex_unlock(&nm_i->build_lock); 2734 2735 return nr - nr_shrink; 2736 } 2737 2738 int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio) 2739 { 2740 void *src_addr, *dst_addr; 2741 size_t inline_size; 2742 struct folio *ifolio; 2743 struct f2fs_inode *ri; 2744 2745 ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino); 2746 if (IS_ERR(ifolio)) 2747 return PTR_ERR(ifolio); 2748 2749 ri = F2FS_INODE(folio); 2750 if (ri->i_inline & F2FS_INLINE_XATTR) { 2751 if (!f2fs_has_inline_xattr(inode)) { 2752 set_inode_flag(inode, FI_INLINE_XATTR); 2753 stat_inc_inline_xattr(inode); 2754 } 2755 } else { 2756 if (f2fs_has_inline_xattr(inode)) { 2757 stat_dec_inline_xattr(inode); 2758 clear_inode_flag(inode, FI_INLINE_XATTR); 2759 } 2760 goto update_inode; 2761 } 2762 2763 dst_addr = inline_xattr_addr(inode, ifolio); 2764 src_addr = inline_xattr_addr(inode, folio); 2765 inline_size = inline_xattr_size(inode); 2766 2767 f2fs_folio_wait_writeback(ifolio, NODE, true, true); 2768 memcpy(dst_addr, src_addr, inline_size); 2769 update_inode: 2770 f2fs_update_inode(inode, ifolio); 2771 f2fs_folio_put(ifolio, true); 2772 return 0; 2773 } 2774 2775 int f2fs_recover_xattr_data(struct inode *inode, struct folio *folio) 2776 { 2777 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2778 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; 2779 nid_t new_xnid; 2780 struct dnode_of_data dn; 2781 struct node_info ni; 2782 struct folio *xfolio; 2783 int err; 2784 2785 if (!prev_xnid) 2786 goto recover_xnid; 2787 2788 /* 1: invalidate the previous xattr nid */ 2789 err = f2fs_get_node_info(sbi, prev_xnid, &ni, false); 2790 if (err) 2791 return err; 2792 2793 f2fs_invalidate_blocks(sbi, ni.blk_addr, 1); 2794 dec_valid_node_count(sbi, inode, false); 2795 set_node_addr(sbi, &ni, NULL_ADDR, false); 2796 2797 recover_xnid: 2798 /* 2: update xattr nid in inode */ 2799 if (!f2fs_alloc_nid(sbi, &new_xnid)) 2800 return -ENOSPC; 2801 2802 set_new_dnode(&dn, inode, NULL, NULL, new_xnid); 2803 xfolio = f2fs_new_node_folio(&dn, XATTR_NODE_OFFSET); 2804 if (IS_ERR(xfolio)) { 2805 f2fs_alloc_nid_failed(sbi, new_xnid); 2806 return PTR_ERR(xfolio); 2807 } 2808 2809 f2fs_alloc_nid_done(sbi, new_xnid); 2810 f2fs_update_inode_page(inode); 2811 2812 /* 3: update and set xattr node page dirty */ 2813 if (folio) { 2814 memcpy(F2FS_NODE(xfolio), F2FS_NODE(folio), 2815 VALID_XATTR_BLOCK_SIZE); 2816 folio_mark_dirty(xfolio); 2817 } 2818 f2fs_folio_put(xfolio, true); 2819 2820 return 0; 2821 } 2822 2823 int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio) 2824 { 2825 struct f2fs_inode *src, *dst; 2826 nid_t ino = ino_of_node(folio); 2827 struct node_info old_ni, new_ni; 2828 struct folio *ifolio; 2829 int err; 2830 2831 err = f2fs_get_node_info(sbi, ino, &old_ni, false); 2832 if (err) 2833 return err; 2834 2835 if (unlikely(old_ni.blk_addr != NULL_ADDR)) 2836 return -EINVAL; 2837 retry: 2838 ifolio = f2fs_grab_cache_folio(NODE_MAPPING(sbi), ino, false); 2839 if (IS_ERR(ifolio)) { 2840 memalloc_retry_wait(GFP_NOFS); 2841 goto retry; 2842 } 2843 2844 /* Should not use this inode from free nid list */ 2845 remove_free_nid(sbi, ino); 2846 2847 if (!folio_test_uptodate(ifolio)) 2848 folio_mark_uptodate(ifolio); 2849 fill_node_footer(ifolio, ino, ino, 0, true); 2850 set_cold_node(ifolio, false); 2851 2852 src = F2FS_INODE(folio); 2853 dst = F2FS_INODE(ifolio); 2854 2855 memcpy(dst, src, offsetof(struct f2fs_inode, i_ext)); 2856 dst->i_size = 0; 2857 dst->i_blocks = cpu_to_le64(1); 2858 dst->i_links = cpu_to_le32(1); 2859 dst->i_xattr_nid = 0; 2860 dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR); 2861 if (dst->i_inline & F2FS_EXTRA_ATTR) { 2862 dst->i_extra_isize = src->i_extra_isize; 2863 2864 if (f2fs_sb_has_flexible_inline_xattr(sbi) && 2865 F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), 2866 i_inline_xattr_size)) 2867 dst->i_inline_xattr_size = src->i_inline_xattr_size; 2868 2869 if (f2fs_sb_has_project_quota(sbi) && 2870 F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), 2871 i_projid)) 2872 dst->i_projid = src->i_projid; 2873 2874 if (f2fs_sb_has_inode_crtime(sbi) && 2875 F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), 2876 i_crtime_nsec)) { 2877 dst->i_crtime = src->i_crtime; 2878 dst->i_crtime_nsec = src->i_crtime_nsec; 2879 } 2880 } 2881 2882 new_ni = old_ni; 2883 new_ni.ino = ino; 2884 2885 if (unlikely(inc_valid_node_count(sbi, NULL, true))) 2886 WARN_ON(1); 2887 set_node_addr(sbi, &new_ni, NEW_ADDR, false); 2888 inc_valid_inode_count(sbi); 2889 folio_mark_dirty(ifolio); 2890 f2fs_folio_put(ifolio, true); 2891 return 0; 2892 } 2893 2894 int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, 2895 unsigned int segno, struct f2fs_summary_block *sum) 2896 { 2897 struct f2fs_node *rn; 2898 struct f2fs_summary *sum_entry; 2899 block_t addr; 2900 int i, idx, last_offset, nrpages; 2901 2902 /* scan the node segment */ 2903 last_offset = BLKS_PER_SEG(sbi); 2904 addr = START_BLOCK(sbi, segno); 2905 sum_entry = &sum->entries[0]; 2906 2907 for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { 2908 nrpages = bio_max_segs(last_offset - i); 2909 2910 /* readahead node pages */ 2911 f2fs_ra_meta_pages(sbi, addr, nrpages, META_POR, true); 2912 2913 for (idx = addr; idx < addr + nrpages; idx++) { 2914 struct folio *folio = f2fs_get_tmp_folio(sbi, idx); 2915 2916 if (IS_ERR(folio)) 2917 return PTR_ERR(folio); 2918 2919 rn = F2FS_NODE(folio); 2920 sum_entry->nid = rn->footer.nid; 2921 sum_entry->version = 0; 2922 sum_entry->ofs_in_node = 0; 2923 sum_entry++; 2924 f2fs_folio_put(folio, true); 2925 } 2926 2927 invalidate_mapping_pages(META_MAPPING(sbi), addr, 2928 addr + nrpages); 2929 } 2930 return 0; 2931 } 2932 2933 static void remove_nats_in_journal(struct f2fs_sb_info *sbi) 2934 { 2935 struct f2fs_nm_info *nm_i = NM_I(sbi); 2936 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 2937 struct f2fs_journal *journal = curseg->journal; 2938 int i; 2939 bool init_dirty; 2940 2941 down_write(&curseg->journal_rwsem); 2942 for (i = 0; i < nats_in_cursum(journal); i++) { 2943 struct nat_entry *ne; 2944 struct f2fs_nat_entry raw_ne; 2945 nid_t nid = le32_to_cpu(nid_in_journal(journal, i)); 2946 2947 if (f2fs_check_nid_range(sbi, nid)) 2948 continue; 2949 2950 init_dirty = false; 2951 2952 raw_ne = nat_in_journal(journal, i); 2953 2954 ne = __lookup_nat_cache(nm_i, nid, true); 2955 if (!ne) { 2956 init_dirty = true; 2957 ne = __alloc_nat_entry(sbi, nid, true); 2958 __init_nat_entry(nm_i, ne, &raw_ne, true, true); 2959 } 2960 2961 /* 2962 * if a free nat in journal has not been used after last 2963 * checkpoint, we should remove it from available nids, 2964 * since later we will add it again. 2965 */ 2966 if (!get_nat_flag(ne, IS_DIRTY) && 2967 le32_to_cpu(raw_ne.block_addr) == NULL_ADDR) { 2968 spin_lock(&nm_i->nid_list_lock); 2969 nm_i->available_nids--; 2970 spin_unlock(&nm_i->nid_list_lock); 2971 } 2972 2973 __set_nat_cache_dirty(nm_i, ne, init_dirty); 2974 } 2975 update_nats_in_cursum(journal, -i); 2976 up_write(&curseg->journal_rwsem); 2977 } 2978 2979 static void __adjust_nat_entry_set(struct nat_entry_set *nes, 2980 struct list_head *head, int max) 2981 { 2982 struct nat_entry_set *cur; 2983 2984 if (nes->entry_cnt >= max) 2985 goto add_out; 2986 2987 list_for_each_entry(cur, head, set_list) { 2988 if (cur->entry_cnt >= nes->entry_cnt) { 2989 list_add(&nes->set_list, cur->set_list.prev); 2990 return; 2991 } 2992 } 2993 add_out: 2994 list_add_tail(&nes->set_list, head); 2995 } 2996 2997 static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, 2998 const struct f2fs_nat_block *nat_blk) 2999 { 3000 struct f2fs_nm_info *nm_i = NM_I(sbi); 3001 unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK; 3002 int valid = 0; 3003 int i = 0; 3004 3005 if (!enabled_nat_bits(sbi, NULL)) 3006 return; 3007 3008 if (nat_index == 0) { 3009 valid = 1; 3010 i = 1; 3011 } 3012 for (; i < NAT_ENTRY_PER_BLOCK; i++) { 3013 if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR) 3014 valid++; 3015 } 3016 if (valid == 0) { 3017 __set_bit_le(nat_index, nm_i->empty_nat_bits); 3018 __clear_bit_le(nat_index, nm_i->full_nat_bits); 3019 return; 3020 } 3021 3022 __clear_bit_le(nat_index, nm_i->empty_nat_bits); 3023 if (valid == NAT_ENTRY_PER_BLOCK) 3024 __set_bit_le(nat_index, nm_i->full_nat_bits); 3025 else 3026 __clear_bit_le(nat_index, nm_i->full_nat_bits); 3027 } 3028 3029 static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, 3030 struct nat_entry_set *set, struct cp_control *cpc) 3031 { 3032 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 3033 struct f2fs_journal *journal = curseg->journal; 3034 nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK; 3035 bool to_journal = true; 3036 struct f2fs_nat_block *nat_blk; 3037 struct nat_entry *ne, *cur; 3038 struct folio *folio = NULL; 3039 3040 /* 3041 * there are two steps to flush nat entries: 3042 * #1, flush nat entries to journal in current hot data summary block. 3043 * #2, flush nat entries to nat page. 3044 */ 3045 if (enabled_nat_bits(sbi, cpc) || 3046 !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL)) 3047 to_journal = false; 3048 3049 if (to_journal) { 3050 down_write(&curseg->journal_rwsem); 3051 } else { 3052 folio = get_next_nat_folio(sbi, start_nid); 3053 if (IS_ERR(folio)) 3054 return PTR_ERR(folio); 3055 3056 nat_blk = folio_address(folio); 3057 f2fs_bug_on(sbi, !nat_blk); 3058 } 3059 3060 /* flush dirty nats in nat entry set */ 3061 list_for_each_entry_safe(ne, cur, &set->entry_list, list) { 3062 struct f2fs_nat_entry *raw_ne; 3063 nid_t nid = nat_get_nid(ne); 3064 int offset; 3065 3066 f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR); 3067 3068 if (to_journal) { 3069 offset = f2fs_lookup_journal_in_cursum(journal, 3070 NAT_JOURNAL, nid, 1); 3071 f2fs_bug_on(sbi, offset < 0); 3072 raw_ne = &nat_in_journal(journal, offset); 3073 nid_in_journal(journal, offset) = cpu_to_le32(nid); 3074 } else { 3075 raw_ne = &nat_blk->entries[nid - start_nid]; 3076 } 3077 raw_nat_from_node_info(raw_ne, &ne->ni); 3078 nat_reset_flag(ne); 3079 __clear_nat_cache_dirty(NM_I(sbi), set, ne); 3080 if (nat_get_blkaddr(ne) == NULL_ADDR) { 3081 add_free_nid(sbi, nid, false, true); 3082 } else { 3083 spin_lock(&NM_I(sbi)->nid_list_lock); 3084 update_free_nid_bitmap(sbi, nid, false, false); 3085 spin_unlock(&NM_I(sbi)->nid_list_lock); 3086 } 3087 } 3088 3089 if (to_journal) { 3090 up_write(&curseg->journal_rwsem); 3091 } else { 3092 __update_nat_bits(sbi, start_nid, nat_blk); 3093 f2fs_folio_put(folio, true); 3094 } 3095 3096 /* Allow dirty nats by node block allocation in write_begin */ 3097 if (!set->entry_cnt) { 3098 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); 3099 kmem_cache_free(nat_entry_set_slab, set); 3100 } 3101 return 0; 3102 } 3103 3104 /* 3105 * This function is called during the checkpointing process. 3106 */ 3107 int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) 3108 { 3109 struct f2fs_nm_info *nm_i = NM_I(sbi); 3110 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 3111 struct f2fs_journal *journal = curseg->journal; 3112 struct nat_entry_set *setvec[NAT_VEC_SIZE]; 3113 struct nat_entry_set *set, *tmp; 3114 unsigned int found; 3115 nid_t set_idx = 0; 3116 LIST_HEAD(sets); 3117 int err = 0; 3118 3119 /* 3120 * during unmount, let's flush nat_bits before checking 3121 * nat_cnt[DIRTY_NAT]. 3122 */ 3123 if (enabled_nat_bits(sbi, cpc)) { 3124 f2fs_down_write(&nm_i->nat_tree_lock); 3125 remove_nats_in_journal(sbi); 3126 f2fs_up_write(&nm_i->nat_tree_lock); 3127 } 3128 3129 if (!nm_i->nat_cnt[DIRTY_NAT]) 3130 return 0; 3131 3132 f2fs_down_write(&nm_i->nat_tree_lock); 3133 3134 /* 3135 * if there are no enough space in journal to store dirty nat 3136 * entries, remove all entries from journal and merge them 3137 * into nat entry set. 3138 */ 3139 if (enabled_nat_bits(sbi, cpc) || 3140 !__has_cursum_space(journal, 3141 nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL)) 3142 remove_nats_in_journal(sbi); 3143 3144 while ((found = __gang_lookup_nat_set(nm_i, 3145 set_idx, NAT_VEC_SIZE, setvec))) { 3146 unsigned idx; 3147 3148 set_idx = setvec[found - 1]->set + 1; 3149 for (idx = 0; idx < found; idx++) 3150 __adjust_nat_entry_set(setvec[idx], &sets, 3151 MAX_NAT_JENTRIES(journal)); 3152 } 3153 3154 /* flush dirty nats in nat entry set */ 3155 list_for_each_entry_safe(set, tmp, &sets, set_list) { 3156 err = __flush_nat_entry_set(sbi, set, cpc); 3157 if (err) 3158 break; 3159 } 3160 3161 f2fs_up_write(&nm_i->nat_tree_lock); 3162 /* Allow dirty nats by node block allocation in write_begin */ 3163 3164 return err; 3165 } 3166 3167 static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) 3168 { 3169 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 3170 struct f2fs_nm_info *nm_i = NM_I(sbi); 3171 unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE; 3172 unsigned int i; 3173 __u64 cp_ver = cur_cp_version(ckpt); 3174 block_t nat_bits_addr; 3175 3176 if (!enabled_nat_bits(sbi, NULL)) 3177 return 0; 3178 3179 nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8); 3180 nm_i->nat_bits = f2fs_kvzalloc(sbi, 3181 F2FS_BLK_TO_BYTES(nm_i->nat_bits_blocks), GFP_KERNEL); 3182 if (!nm_i->nat_bits) 3183 return -ENOMEM; 3184 3185 nat_bits_addr = __start_cp_addr(sbi) + BLKS_PER_SEG(sbi) - 3186 nm_i->nat_bits_blocks; 3187 for (i = 0; i < nm_i->nat_bits_blocks; i++) { 3188 struct folio *folio; 3189 3190 folio = f2fs_get_meta_folio(sbi, nat_bits_addr++); 3191 if (IS_ERR(folio)) 3192 return PTR_ERR(folio); 3193 3194 memcpy(nm_i->nat_bits + F2FS_BLK_TO_BYTES(i), 3195 folio_address(folio), F2FS_BLKSIZE); 3196 f2fs_folio_put(folio, true); 3197 } 3198 3199 cp_ver |= (cur_cp_crc(ckpt) << 32); 3200 if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) { 3201 disable_nat_bits(sbi, true); 3202 return 0; 3203 } 3204 3205 nm_i->full_nat_bits = nm_i->nat_bits + 8; 3206 nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes; 3207 3208 f2fs_notice(sbi, "Found nat_bits in checkpoint"); 3209 return 0; 3210 } 3211 3212 static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) 3213 { 3214 struct f2fs_nm_info *nm_i = NM_I(sbi); 3215 unsigned int i = 0; 3216 nid_t nid, last_nid; 3217 3218 if (!enabled_nat_bits(sbi, NULL)) 3219 return; 3220 3221 for (i = 0; i < nm_i->nat_blocks; i++) { 3222 i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i); 3223 if (i >= nm_i->nat_blocks) 3224 break; 3225 3226 __set_bit_le(i, nm_i->nat_block_bitmap); 3227 3228 nid = i * NAT_ENTRY_PER_BLOCK; 3229 last_nid = nid + NAT_ENTRY_PER_BLOCK; 3230 3231 spin_lock(&NM_I(sbi)->nid_list_lock); 3232 for (; nid < last_nid; nid++) 3233 update_free_nid_bitmap(sbi, nid, true, true); 3234 spin_unlock(&NM_I(sbi)->nid_list_lock); 3235 } 3236 3237 for (i = 0; i < nm_i->nat_blocks; i++) { 3238 i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i); 3239 if (i >= nm_i->nat_blocks) 3240 break; 3241 3242 __set_bit_le(i, nm_i->nat_block_bitmap); 3243 } 3244 } 3245 3246 static int init_node_manager(struct f2fs_sb_info *sbi) 3247 { 3248 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); 3249 struct f2fs_nm_info *nm_i = NM_I(sbi); 3250 unsigned char *version_bitmap; 3251 unsigned int nat_segs; 3252 int err; 3253 3254 nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr); 3255 3256 /* segment_count_nat includes pair segment so divide to 2. */ 3257 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; 3258 nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); 3259 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks; 3260 3261 /* not used nids: 0, node, meta, (and root counted as valid node) */ 3262 nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count - 3263 F2FS_RESERVED_NODE_NUM; 3264 nm_i->nid_cnt[FREE_NID] = 0; 3265 nm_i->nid_cnt[PREALLOC_NID] = 0; 3266 nm_i->ram_thresh = DEF_RAM_THRESHOLD; 3267 nm_i->ra_nid_pages = DEF_RA_NID_PAGES; 3268 nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; 3269 nm_i->max_rf_node_blocks = DEF_RF_NODE_BLOCKS; 3270 3271 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); 3272 INIT_LIST_HEAD(&nm_i->free_nid_list); 3273 INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); 3274 INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); 3275 INIT_LIST_HEAD(&nm_i->nat_entries); 3276 spin_lock_init(&nm_i->nat_list_lock); 3277 3278 mutex_init(&nm_i->build_lock); 3279 spin_lock_init(&nm_i->nid_list_lock); 3280 init_f2fs_rwsem(&nm_i->nat_tree_lock); 3281 3282 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); 3283 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); 3284 version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); 3285 nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size, 3286 GFP_KERNEL); 3287 if (!nm_i->nat_bitmap) 3288 return -ENOMEM; 3289 3290 if (!test_opt(sbi, NAT_BITS)) 3291 disable_nat_bits(sbi, true); 3292 3293 err = __get_nat_bitmaps(sbi); 3294 if (err) 3295 return err; 3296 3297 #ifdef CONFIG_F2FS_CHECK_FS 3298 nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size, 3299 GFP_KERNEL); 3300 if (!nm_i->nat_bitmap_mir) 3301 return -ENOMEM; 3302 #endif 3303 3304 return 0; 3305 } 3306 3307 static int init_free_nid_cache(struct f2fs_sb_info *sbi) 3308 { 3309 struct f2fs_nm_info *nm_i = NM_I(sbi); 3310 int i; 3311 3312 nm_i->free_nid_bitmap = 3313 f2fs_kvzalloc(sbi, array_size(sizeof(unsigned char *), 3314 nm_i->nat_blocks), 3315 GFP_KERNEL); 3316 if (!nm_i->free_nid_bitmap) 3317 return -ENOMEM; 3318 3319 for (i = 0; i < nm_i->nat_blocks; i++) { 3320 nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi, 3321 f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK), GFP_KERNEL); 3322 if (!nm_i->free_nid_bitmap[i]) 3323 return -ENOMEM; 3324 } 3325 3326 nm_i->nat_block_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks / 8, 3327 GFP_KERNEL); 3328 if (!nm_i->nat_block_bitmap) 3329 return -ENOMEM; 3330 3331 nm_i->free_nid_count = 3332 f2fs_kvzalloc(sbi, array_size(sizeof(unsigned short), 3333 nm_i->nat_blocks), 3334 GFP_KERNEL); 3335 if (!nm_i->free_nid_count) 3336 return -ENOMEM; 3337 return 0; 3338 } 3339 3340 int f2fs_build_node_manager(struct f2fs_sb_info *sbi) 3341 { 3342 int err; 3343 3344 sbi->nm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_nm_info), 3345 GFP_KERNEL); 3346 if (!sbi->nm_info) 3347 return -ENOMEM; 3348 3349 err = init_node_manager(sbi); 3350 if (err) 3351 return err; 3352 3353 err = init_free_nid_cache(sbi); 3354 if (err) 3355 return err; 3356 3357 /* load free nid status from nat_bits table */ 3358 load_free_nid_bitmap(sbi); 3359 3360 return f2fs_build_free_nids(sbi, true, true); 3361 } 3362 3363 void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) 3364 { 3365 struct f2fs_nm_info *nm_i = NM_I(sbi); 3366 struct free_nid *i, *next_i; 3367 void *vec[NAT_VEC_SIZE]; 3368 struct nat_entry **natvec = (struct nat_entry **)vec; 3369 struct nat_entry_set **setvec = (struct nat_entry_set **)vec; 3370 nid_t nid = 0; 3371 unsigned int found; 3372 3373 if (!nm_i) 3374 return; 3375 3376 /* destroy free nid list */ 3377 spin_lock(&nm_i->nid_list_lock); 3378 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { 3379 __remove_free_nid(sbi, i, FREE_NID); 3380 spin_unlock(&nm_i->nid_list_lock); 3381 kmem_cache_free(free_nid_slab, i); 3382 spin_lock(&nm_i->nid_list_lock); 3383 } 3384 f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID]); 3385 f2fs_bug_on(sbi, nm_i->nid_cnt[PREALLOC_NID]); 3386 f2fs_bug_on(sbi, !list_empty(&nm_i->free_nid_list)); 3387 spin_unlock(&nm_i->nid_list_lock); 3388 3389 /* destroy nat cache */ 3390 f2fs_down_write(&nm_i->nat_tree_lock); 3391 while ((found = __gang_lookup_nat_cache(nm_i, 3392 nid, NAT_VEC_SIZE, natvec))) { 3393 unsigned idx; 3394 3395 nid = nat_get_nid(natvec[found - 1]) + 1; 3396 for (idx = 0; idx < found; idx++) { 3397 spin_lock(&nm_i->nat_list_lock); 3398 list_del(&natvec[idx]->list); 3399 spin_unlock(&nm_i->nat_list_lock); 3400 3401 __del_from_nat_cache(nm_i, natvec[idx]); 3402 } 3403 } 3404 f2fs_bug_on(sbi, nm_i->nat_cnt[TOTAL_NAT]); 3405 3406 /* destroy nat set cache */ 3407 nid = 0; 3408 memset(vec, 0, sizeof(void *) * NAT_VEC_SIZE); 3409 while ((found = __gang_lookup_nat_set(nm_i, 3410 nid, NAT_VEC_SIZE, setvec))) { 3411 unsigned idx; 3412 3413 nid = setvec[found - 1]->set + 1; 3414 for (idx = 0; idx < found; idx++) { 3415 /* entry_cnt is not zero, when cp_error was occurred */ 3416 f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list)); 3417 radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set); 3418 kmem_cache_free(nat_entry_set_slab, setvec[idx]); 3419 } 3420 } 3421 f2fs_up_write(&nm_i->nat_tree_lock); 3422 3423 kvfree(nm_i->nat_block_bitmap); 3424 if (nm_i->free_nid_bitmap) { 3425 int i; 3426 3427 for (i = 0; i < nm_i->nat_blocks; i++) 3428 kvfree(nm_i->free_nid_bitmap[i]); 3429 kvfree(nm_i->free_nid_bitmap); 3430 } 3431 kvfree(nm_i->free_nid_count); 3432 3433 kfree(nm_i->nat_bitmap); 3434 kvfree(nm_i->nat_bits); 3435 #ifdef CONFIG_F2FS_CHECK_FS 3436 kfree(nm_i->nat_bitmap_mir); 3437 #endif 3438 sbi->nm_info = NULL; 3439 kfree(nm_i); 3440 } 3441 3442 int __init f2fs_create_node_manager_caches(void) 3443 { 3444 nat_entry_slab = f2fs_kmem_cache_create("f2fs_nat_entry", 3445 sizeof(struct nat_entry)); 3446 if (!nat_entry_slab) 3447 goto fail; 3448 3449 free_nid_slab = f2fs_kmem_cache_create("f2fs_free_nid", 3450 sizeof(struct free_nid)); 3451 if (!free_nid_slab) 3452 goto destroy_nat_entry; 3453 3454 nat_entry_set_slab = f2fs_kmem_cache_create("f2fs_nat_entry_set", 3455 sizeof(struct nat_entry_set)); 3456 if (!nat_entry_set_slab) 3457 goto destroy_free_nid; 3458 3459 fsync_node_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_node_entry", 3460 sizeof(struct fsync_node_entry)); 3461 if (!fsync_node_entry_slab) 3462 goto destroy_nat_entry_set; 3463 return 0; 3464 3465 destroy_nat_entry_set: 3466 kmem_cache_destroy(nat_entry_set_slab); 3467 destroy_free_nid: 3468 kmem_cache_destroy(free_nid_slab); 3469 destroy_nat_entry: 3470 kmem_cache_destroy(nat_entry_slab); 3471 fail: 3472 return -ENOMEM; 3473 } 3474 3475 void f2fs_destroy_node_manager_caches(void) 3476 { 3477 kmem_cache_destroy(fsync_node_entry_slab); 3478 kmem_cache_destroy(nat_entry_set_slab); 3479 kmem_cache_destroy(free_nid_slab); 3480 kmem_cache_destroy(nat_entry_slab); 3481 } 3482