1 /* 2 * fs/f2fs/node.c 3 * 4 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 5 * http://www.samsung.com/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 #include <linux/fs.h> 12 #include <linux/f2fs_fs.h> 13 #include <linux/mpage.h> 14 #include <linux/backing-dev.h> 15 #include <linux/blkdev.h> 16 #include <linux/pagevec.h> 17 #include <linux/swap.h> 18 19 #include "f2fs.h" 20 #include "node.h" 21 #include "segment.h" 22 #include "trace.h" 23 #include <trace/events/f2fs.h> 24 25 #define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock) 26 27 static struct kmem_cache *nat_entry_slab; 28 static struct kmem_cache *free_nid_slab; 29 static struct kmem_cache *nat_entry_set_slab; 30 31 bool available_free_memory(struct f2fs_sb_info *sbi, int type) 32 { 33 struct f2fs_nm_info *nm_i = NM_I(sbi); 34 struct sysinfo val; 35 unsigned long avail_ram; 36 unsigned long mem_size = 0; 37 bool res = false; 38 39 si_meminfo(&val); 40 41 /* only uses low memory */ 42 avail_ram = val.totalram - val.totalhigh; 43 44 /* 45 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively 46 */ 47 if (type == FREE_NIDS) { 48 mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 49 PAGE_SHIFT; 50 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); 51 } else if (type == NAT_ENTRIES) { 52 mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 53 PAGE_SHIFT; 54 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); 55 if (excess_cached_nats(sbi)) 56 res = false; 57 } else if (type == DIRTY_DENTS) { 58 if (sbi->sb->s_bdi->wb.dirty_exceeded) 59 return false; 60 mem_size = get_pages(sbi, F2FS_DIRTY_DENTS); 61 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 62 } else if (type == INO_ENTRIES) { 63 int i; 64 65 for (i = 0; i <= UPDATE_INO; i++) 66 mem_size += (sbi->im[i].ino_num * 67 sizeof(struct ino_entry)) >> PAGE_SHIFT; 68 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 69 } else if (type == EXTENT_CACHE) { 70 mem_size = (atomic_read(&sbi->total_ext_tree) * 71 sizeof(struct extent_tree) + 72 atomic_read(&sbi->total_ext_node) * 73 sizeof(struct extent_node)) >> PAGE_SHIFT; 74 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 75 } else { 76 if (!sbi->sb->s_bdi->wb.dirty_exceeded) 77 return true; 78 } 79 return res; 80 } 81 82 static void clear_node_page_dirty(struct page *page) 83 { 84 struct address_space *mapping = page->mapping; 85 unsigned int long flags; 86 87 if (PageDirty(page)) { 88 spin_lock_irqsave(&mapping->tree_lock, flags); 89 radix_tree_tag_clear(&mapping->page_tree, 90 page_index(page), 91 PAGECACHE_TAG_DIRTY); 92 spin_unlock_irqrestore(&mapping->tree_lock, flags); 93 94 clear_page_dirty_for_io(page); 95 dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); 96 } 97 ClearPageUptodate(page); 98 } 99 100 static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) 101 { 102 pgoff_t index = current_nat_addr(sbi, nid); 103 return get_meta_page(sbi, index); 104 } 105 106 static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) 107 { 108 struct page *src_page; 109 struct page *dst_page; 110 pgoff_t src_off; 111 pgoff_t dst_off; 112 void *src_addr; 113 void *dst_addr; 114 struct f2fs_nm_info *nm_i = NM_I(sbi); 115 116 src_off = current_nat_addr(sbi, nid); 117 dst_off = next_nat_addr(sbi, src_off); 118 119 /* get current nat block page with lock */ 120 src_page = get_meta_page(sbi, src_off); 121 dst_page = grab_meta_page(sbi, dst_off); 122 f2fs_bug_on(sbi, PageDirty(src_page)); 123 124 src_addr = page_address(src_page); 125 dst_addr = page_address(dst_page); 126 memcpy(dst_addr, src_addr, PAGE_SIZE); 127 set_page_dirty(dst_page); 128 f2fs_put_page(src_page, 1); 129 130 set_to_next_nat(nm_i, nid); 131 132 return dst_page; 133 } 134 135 static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) 136 { 137 return radix_tree_lookup(&nm_i->nat_root, n); 138 } 139 140 static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, 141 nid_t start, unsigned int nr, struct nat_entry **ep) 142 { 143 return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr); 144 } 145 146 static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) 147 { 148 list_del(&e->list); 149 radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); 150 nm_i->nat_cnt--; 151 kmem_cache_free(nat_entry_slab, e); 152 } 153 154 static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, 155 struct nat_entry *ne) 156 { 157 nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); 158 struct nat_entry_set *head; 159 160 if (get_nat_flag(ne, IS_DIRTY)) 161 return; 162 163 head = radix_tree_lookup(&nm_i->nat_set_root, set); 164 if (!head) { 165 head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS); 166 167 INIT_LIST_HEAD(&head->entry_list); 168 INIT_LIST_HEAD(&head->set_list); 169 head->set = set; 170 head->entry_cnt = 0; 171 f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); 172 } 173 list_move_tail(&ne->list, &head->entry_list); 174 nm_i->dirty_nat_cnt++; 175 head->entry_cnt++; 176 set_nat_flag(ne, IS_DIRTY, true); 177 } 178 179 static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, 180 struct nat_entry *ne) 181 { 182 nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); 183 struct nat_entry_set *head; 184 185 head = radix_tree_lookup(&nm_i->nat_set_root, set); 186 if (head) { 187 list_move_tail(&ne->list, &nm_i->nat_entries); 188 set_nat_flag(ne, IS_DIRTY, false); 189 head->entry_cnt--; 190 nm_i->dirty_nat_cnt--; 191 } 192 } 193 194 static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, 195 nid_t start, unsigned int nr, struct nat_entry_set **ep) 196 { 197 return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep, 198 start, nr); 199 } 200 201 int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) 202 { 203 struct f2fs_nm_info *nm_i = NM_I(sbi); 204 struct nat_entry *e; 205 bool need = false; 206 207 down_read(&nm_i->nat_tree_lock); 208 e = __lookup_nat_cache(nm_i, nid); 209 if (e) { 210 if (!get_nat_flag(e, IS_CHECKPOINTED) && 211 !get_nat_flag(e, HAS_FSYNCED_INODE)) 212 need = true; 213 } 214 up_read(&nm_i->nat_tree_lock); 215 return need; 216 } 217 218 bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) 219 { 220 struct f2fs_nm_info *nm_i = NM_I(sbi); 221 struct nat_entry *e; 222 bool is_cp = true; 223 224 down_read(&nm_i->nat_tree_lock); 225 e = __lookup_nat_cache(nm_i, nid); 226 if (e && !get_nat_flag(e, IS_CHECKPOINTED)) 227 is_cp = false; 228 up_read(&nm_i->nat_tree_lock); 229 return is_cp; 230 } 231 232 bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) 233 { 234 struct f2fs_nm_info *nm_i = NM_I(sbi); 235 struct nat_entry *e; 236 bool need_update = true; 237 238 down_read(&nm_i->nat_tree_lock); 239 e = __lookup_nat_cache(nm_i, ino); 240 if (e && get_nat_flag(e, HAS_LAST_FSYNC) && 241 (get_nat_flag(e, IS_CHECKPOINTED) || 242 get_nat_flag(e, HAS_FSYNCED_INODE))) 243 need_update = false; 244 up_read(&nm_i->nat_tree_lock); 245 return need_update; 246 } 247 248 static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) 249 { 250 struct nat_entry *new; 251 252 new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS); 253 f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); 254 memset(new, 0, sizeof(struct nat_entry)); 255 nat_set_nid(new, nid); 256 nat_reset_flag(new); 257 list_add_tail(&new->list, &nm_i->nat_entries); 258 nm_i->nat_cnt++; 259 return new; 260 } 261 262 static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, 263 struct f2fs_nat_entry *ne) 264 { 265 struct f2fs_nm_info *nm_i = NM_I(sbi); 266 struct nat_entry *e; 267 268 e = __lookup_nat_cache(nm_i, nid); 269 if (!e) { 270 e = grab_nat_entry(nm_i, nid); 271 node_info_from_raw_nat(&e->ni, ne); 272 } else { 273 f2fs_bug_on(sbi, nat_get_ino(e) != ne->ino || 274 nat_get_blkaddr(e) != ne->block_addr || 275 nat_get_version(e) != ne->version); 276 } 277 } 278 279 static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, 280 block_t new_blkaddr, bool fsync_done) 281 { 282 struct f2fs_nm_info *nm_i = NM_I(sbi); 283 struct nat_entry *e; 284 285 down_write(&nm_i->nat_tree_lock); 286 e = __lookup_nat_cache(nm_i, ni->nid); 287 if (!e) { 288 e = grab_nat_entry(nm_i, ni->nid); 289 copy_node_info(&e->ni, ni); 290 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); 291 } else if (new_blkaddr == NEW_ADDR) { 292 /* 293 * when nid is reallocated, 294 * previous nat entry can be remained in nat cache. 295 * So, reinitialize it with new information. 296 */ 297 copy_node_info(&e->ni, ni); 298 f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); 299 } 300 301 /* sanity check */ 302 f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr); 303 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR && 304 new_blkaddr == NULL_ADDR); 305 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && 306 new_blkaddr == NEW_ADDR); 307 f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR && 308 nat_get_blkaddr(e) != NULL_ADDR && 309 new_blkaddr == NEW_ADDR); 310 311 /* increment version no as node is removed */ 312 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { 313 unsigned char version = nat_get_version(e); 314 nat_set_version(e, inc_node_version(version)); 315 316 /* in order to reuse the nid */ 317 if (nm_i->next_scan_nid > ni->nid) 318 nm_i->next_scan_nid = ni->nid; 319 } 320 321 /* change address */ 322 nat_set_blkaddr(e, new_blkaddr); 323 if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR) 324 set_nat_flag(e, IS_CHECKPOINTED, false); 325 __set_nat_cache_dirty(nm_i, e); 326 327 /* update fsync_mark if its inode nat entry is still alive */ 328 if (ni->nid != ni->ino) 329 e = __lookup_nat_cache(nm_i, ni->ino); 330 if (e) { 331 if (fsync_done && ni->nid == ni->ino) 332 set_nat_flag(e, HAS_FSYNCED_INODE, true); 333 set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); 334 } 335 up_write(&nm_i->nat_tree_lock); 336 } 337 338 int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) 339 { 340 struct f2fs_nm_info *nm_i = NM_I(sbi); 341 int nr = nr_shrink; 342 343 if (!down_write_trylock(&nm_i->nat_tree_lock)) 344 return 0; 345 346 while (nr_shrink && !list_empty(&nm_i->nat_entries)) { 347 struct nat_entry *ne; 348 ne = list_first_entry(&nm_i->nat_entries, 349 struct nat_entry, list); 350 __del_from_nat_cache(nm_i, ne); 351 nr_shrink--; 352 } 353 up_write(&nm_i->nat_tree_lock); 354 return nr - nr_shrink; 355 } 356 357 /* 358 * This function always returns success 359 */ 360 void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) 361 { 362 struct f2fs_nm_info *nm_i = NM_I(sbi); 363 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 364 struct f2fs_journal *journal = curseg->journal; 365 nid_t start_nid = START_NID(nid); 366 struct f2fs_nat_block *nat_blk; 367 struct page *page = NULL; 368 struct f2fs_nat_entry ne; 369 struct nat_entry *e; 370 int i; 371 372 ni->nid = nid; 373 374 /* Check nat cache */ 375 down_read(&nm_i->nat_tree_lock); 376 e = __lookup_nat_cache(nm_i, nid); 377 if (e) { 378 ni->ino = nat_get_ino(e); 379 ni->blk_addr = nat_get_blkaddr(e); 380 ni->version = nat_get_version(e); 381 up_read(&nm_i->nat_tree_lock); 382 return; 383 } 384 385 memset(&ne, 0, sizeof(struct f2fs_nat_entry)); 386 387 /* Check current segment summary */ 388 down_read(&curseg->journal_rwsem); 389 i = lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0); 390 if (i >= 0) { 391 ne = nat_in_journal(journal, i); 392 node_info_from_raw_nat(ni, &ne); 393 } 394 up_read(&curseg->journal_rwsem); 395 if (i >= 0) 396 goto cache; 397 398 /* Fill node_info from nat page */ 399 page = get_current_nat_page(sbi, start_nid); 400 nat_blk = (struct f2fs_nat_block *)page_address(page); 401 ne = nat_blk->entries[nid - start_nid]; 402 node_info_from_raw_nat(ni, &ne); 403 f2fs_put_page(page, 1); 404 cache: 405 up_read(&nm_i->nat_tree_lock); 406 /* cache nat entry */ 407 down_write(&nm_i->nat_tree_lock); 408 cache_nat_entry(sbi, nid, &ne); 409 up_write(&nm_i->nat_tree_lock); 410 } 411 412 /* 413 * readahead MAX_RA_NODE number of node pages. 414 */ 415 static void ra_node_pages(struct page *parent, int start, int n) 416 { 417 struct f2fs_sb_info *sbi = F2FS_P_SB(parent); 418 struct blk_plug plug; 419 int i, end; 420 nid_t nid; 421 422 blk_start_plug(&plug); 423 424 /* Then, try readahead for siblings of the desired node */ 425 end = start + n; 426 end = min(end, NIDS_PER_BLOCK); 427 for (i = start; i < end; i++) { 428 nid = get_nid(parent, i, false); 429 ra_node_page(sbi, nid); 430 } 431 432 blk_finish_plug(&plug); 433 } 434 435 pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs) 436 { 437 const long direct_index = ADDRS_PER_INODE(dn->inode); 438 const long direct_blks = ADDRS_PER_BLOCK; 439 const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; 440 unsigned int skipped_unit = ADDRS_PER_BLOCK; 441 int cur_level = dn->cur_level; 442 int max_level = dn->max_level; 443 pgoff_t base = 0; 444 445 if (!dn->max_level) 446 return pgofs + 1; 447 448 while (max_level-- > cur_level) 449 skipped_unit *= NIDS_PER_BLOCK; 450 451 switch (dn->max_level) { 452 case 3: 453 base += 2 * indirect_blks; 454 case 2: 455 base += 2 * direct_blks; 456 case 1: 457 base += direct_index; 458 break; 459 default: 460 f2fs_bug_on(F2FS_I_SB(dn->inode), 1); 461 } 462 463 return ((pgofs - base) / skipped_unit + 1) * skipped_unit + base; 464 } 465 466 /* 467 * The maximum depth is four. 468 * Offset[0] will have raw inode offset. 469 */ 470 static int get_node_path(struct inode *inode, long block, 471 int offset[4], unsigned int noffset[4]) 472 { 473 const long direct_index = ADDRS_PER_INODE(inode); 474 const long direct_blks = ADDRS_PER_BLOCK; 475 const long dptrs_per_blk = NIDS_PER_BLOCK; 476 const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; 477 const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK; 478 int n = 0; 479 int level = 0; 480 481 noffset[0] = 0; 482 483 if (block < direct_index) { 484 offset[n] = block; 485 goto got; 486 } 487 block -= direct_index; 488 if (block < direct_blks) { 489 offset[n++] = NODE_DIR1_BLOCK; 490 noffset[n] = 1; 491 offset[n] = block; 492 level = 1; 493 goto got; 494 } 495 block -= direct_blks; 496 if (block < direct_blks) { 497 offset[n++] = NODE_DIR2_BLOCK; 498 noffset[n] = 2; 499 offset[n] = block; 500 level = 1; 501 goto got; 502 } 503 block -= direct_blks; 504 if (block < indirect_blks) { 505 offset[n++] = NODE_IND1_BLOCK; 506 noffset[n] = 3; 507 offset[n++] = block / direct_blks; 508 noffset[n] = 4 + offset[n - 1]; 509 offset[n] = block % direct_blks; 510 level = 2; 511 goto got; 512 } 513 block -= indirect_blks; 514 if (block < indirect_blks) { 515 offset[n++] = NODE_IND2_BLOCK; 516 noffset[n] = 4 + dptrs_per_blk; 517 offset[n++] = block / direct_blks; 518 noffset[n] = 5 + dptrs_per_blk + offset[n - 1]; 519 offset[n] = block % direct_blks; 520 level = 2; 521 goto got; 522 } 523 block -= indirect_blks; 524 if (block < dindirect_blks) { 525 offset[n++] = NODE_DIND_BLOCK; 526 noffset[n] = 5 + (dptrs_per_blk * 2); 527 offset[n++] = block / indirect_blks; 528 noffset[n] = 6 + (dptrs_per_blk * 2) + 529 offset[n - 1] * (dptrs_per_blk + 1); 530 offset[n++] = (block / direct_blks) % dptrs_per_blk; 531 noffset[n] = 7 + (dptrs_per_blk * 2) + 532 offset[n - 2] * (dptrs_per_blk + 1) + 533 offset[n - 1]; 534 offset[n] = block % direct_blks; 535 level = 3; 536 goto got; 537 } else { 538 BUG(); 539 } 540 got: 541 return level; 542 } 543 544 /* 545 * Caller should call f2fs_put_dnode(dn). 546 * Also, it should grab and release a rwsem by calling f2fs_lock_op() and 547 * f2fs_unlock_op() only if ro is not set RDONLY_NODE. 548 * In the case of RDONLY_NODE, we don't need to care about mutex. 549 */ 550 int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) 551 { 552 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 553 struct page *npage[4]; 554 struct page *parent = NULL; 555 int offset[4]; 556 unsigned int noffset[4]; 557 nid_t nids[4]; 558 int level, i = 0; 559 int err = 0; 560 561 level = get_node_path(dn->inode, index, offset, noffset); 562 563 nids[0] = dn->inode->i_ino; 564 npage[0] = dn->inode_page; 565 566 if (!npage[0]) { 567 npage[0] = get_node_page(sbi, nids[0]); 568 if (IS_ERR(npage[0])) 569 return PTR_ERR(npage[0]); 570 } 571 572 /* if inline_data is set, should not report any block indices */ 573 if (f2fs_has_inline_data(dn->inode) && index) { 574 err = -ENOENT; 575 f2fs_put_page(npage[0], 1); 576 goto release_out; 577 } 578 579 parent = npage[0]; 580 if (level != 0) 581 nids[1] = get_nid(parent, offset[0], true); 582 dn->inode_page = npage[0]; 583 dn->inode_page_locked = true; 584 585 /* get indirect or direct nodes */ 586 for (i = 1; i <= level; i++) { 587 bool done = false; 588 589 if (!nids[i] && mode == ALLOC_NODE) { 590 /* alloc new node */ 591 if (!alloc_nid(sbi, &(nids[i]))) { 592 err = -ENOSPC; 593 goto release_pages; 594 } 595 596 dn->nid = nids[i]; 597 npage[i] = new_node_page(dn, noffset[i], NULL); 598 if (IS_ERR(npage[i])) { 599 alloc_nid_failed(sbi, nids[i]); 600 err = PTR_ERR(npage[i]); 601 goto release_pages; 602 } 603 604 set_nid(parent, offset[i - 1], nids[i], i == 1); 605 alloc_nid_done(sbi, nids[i]); 606 done = true; 607 } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { 608 npage[i] = get_node_page_ra(parent, offset[i - 1]); 609 if (IS_ERR(npage[i])) { 610 err = PTR_ERR(npage[i]); 611 goto release_pages; 612 } 613 done = true; 614 } 615 if (i == 1) { 616 dn->inode_page_locked = false; 617 unlock_page(parent); 618 } else { 619 f2fs_put_page(parent, 1); 620 } 621 622 if (!done) { 623 npage[i] = get_node_page(sbi, nids[i]); 624 if (IS_ERR(npage[i])) { 625 err = PTR_ERR(npage[i]); 626 f2fs_put_page(npage[0], 0); 627 goto release_out; 628 } 629 } 630 if (i < level) { 631 parent = npage[i]; 632 nids[i + 1] = get_nid(parent, offset[i], false); 633 } 634 } 635 dn->nid = nids[level]; 636 dn->ofs_in_node = offset[level]; 637 dn->node_page = npage[level]; 638 dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); 639 return 0; 640 641 release_pages: 642 f2fs_put_page(parent, 1); 643 if (i > 1) 644 f2fs_put_page(npage[0], 0); 645 release_out: 646 dn->inode_page = NULL; 647 dn->node_page = NULL; 648 if (err == -ENOENT) { 649 dn->cur_level = i; 650 dn->max_level = level; 651 dn->ofs_in_node = offset[level]; 652 } 653 return err; 654 } 655 656 static void truncate_node(struct dnode_of_data *dn) 657 { 658 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 659 struct node_info ni; 660 661 get_node_info(sbi, dn->nid, &ni); 662 if (dn->inode->i_blocks == 0) { 663 f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR); 664 goto invalidate; 665 } 666 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); 667 668 /* Deallocate node address */ 669 invalidate_blocks(sbi, ni.blk_addr); 670 dec_valid_node_count(sbi, dn->inode); 671 set_node_addr(sbi, &ni, NULL_ADDR, false); 672 673 if (dn->nid == dn->inode->i_ino) { 674 remove_orphan_inode(sbi, dn->nid); 675 dec_valid_inode_count(sbi); 676 f2fs_inode_synced(dn->inode); 677 } 678 invalidate: 679 clear_node_page_dirty(dn->node_page); 680 set_sbi_flag(sbi, SBI_IS_DIRTY); 681 682 f2fs_put_page(dn->node_page, 1); 683 684 invalidate_mapping_pages(NODE_MAPPING(sbi), 685 dn->node_page->index, dn->node_page->index); 686 687 dn->node_page = NULL; 688 trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); 689 } 690 691 static int truncate_dnode(struct dnode_of_data *dn) 692 { 693 struct page *page; 694 695 if (dn->nid == 0) 696 return 1; 697 698 /* get direct node */ 699 page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); 700 if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) 701 return 1; 702 else if (IS_ERR(page)) 703 return PTR_ERR(page); 704 705 /* Make dnode_of_data for parameter */ 706 dn->node_page = page; 707 dn->ofs_in_node = 0; 708 truncate_data_blocks(dn); 709 truncate_node(dn); 710 return 1; 711 } 712 713 static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, 714 int ofs, int depth) 715 { 716 struct dnode_of_data rdn = *dn; 717 struct page *page; 718 struct f2fs_node *rn; 719 nid_t child_nid; 720 unsigned int child_nofs; 721 int freed = 0; 722 int i, ret; 723 724 if (dn->nid == 0) 725 return NIDS_PER_BLOCK + 1; 726 727 trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); 728 729 page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); 730 if (IS_ERR(page)) { 731 trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); 732 return PTR_ERR(page); 733 } 734 735 ra_node_pages(page, ofs, NIDS_PER_BLOCK); 736 737 rn = F2FS_NODE(page); 738 if (depth < 3) { 739 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { 740 child_nid = le32_to_cpu(rn->in.nid[i]); 741 if (child_nid == 0) 742 continue; 743 rdn.nid = child_nid; 744 ret = truncate_dnode(&rdn); 745 if (ret < 0) 746 goto out_err; 747 if (set_nid(page, i, 0, false)) 748 dn->node_changed = true; 749 } 750 } else { 751 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1; 752 for (i = ofs; i < NIDS_PER_BLOCK; i++) { 753 child_nid = le32_to_cpu(rn->in.nid[i]); 754 if (child_nid == 0) { 755 child_nofs += NIDS_PER_BLOCK + 1; 756 continue; 757 } 758 rdn.nid = child_nid; 759 ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); 760 if (ret == (NIDS_PER_BLOCK + 1)) { 761 if (set_nid(page, i, 0, false)) 762 dn->node_changed = true; 763 child_nofs += ret; 764 } else if (ret < 0 && ret != -ENOENT) { 765 goto out_err; 766 } 767 } 768 freed = child_nofs; 769 } 770 771 if (!ofs) { 772 /* remove current indirect node */ 773 dn->node_page = page; 774 truncate_node(dn); 775 freed++; 776 } else { 777 f2fs_put_page(page, 1); 778 } 779 trace_f2fs_truncate_nodes_exit(dn->inode, freed); 780 return freed; 781 782 out_err: 783 f2fs_put_page(page, 1); 784 trace_f2fs_truncate_nodes_exit(dn->inode, ret); 785 return ret; 786 } 787 788 static int truncate_partial_nodes(struct dnode_of_data *dn, 789 struct f2fs_inode *ri, int *offset, int depth) 790 { 791 struct page *pages[2]; 792 nid_t nid[3]; 793 nid_t child_nid; 794 int err = 0; 795 int i; 796 int idx = depth - 2; 797 798 nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); 799 if (!nid[0]) 800 return 0; 801 802 /* get indirect nodes in the path */ 803 for (i = 0; i < idx + 1; i++) { 804 /* reference count'll be increased */ 805 pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]); 806 if (IS_ERR(pages[i])) { 807 err = PTR_ERR(pages[i]); 808 idx = i - 1; 809 goto fail; 810 } 811 nid[i + 1] = get_nid(pages[i], offset[i + 1], false); 812 } 813 814 ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK); 815 816 /* free direct nodes linked to a partial indirect node */ 817 for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { 818 child_nid = get_nid(pages[idx], i, false); 819 if (!child_nid) 820 continue; 821 dn->nid = child_nid; 822 err = truncate_dnode(dn); 823 if (err < 0) 824 goto fail; 825 if (set_nid(pages[idx], i, 0, false)) 826 dn->node_changed = true; 827 } 828 829 if (offset[idx + 1] == 0) { 830 dn->node_page = pages[idx]; 831 dn->nid = nid[idx]; 832 truncate_node(dn); 833 } else { 834 f2fs_put_page(pages[idx], 1); 835 } 836 offset[idx]++; 837 offset[idx + 1] = 0; 838 idx--; 839 fail: 840 for (i = idx; i >= 0; i--) 841 f2fs_put_page(pages[i], 1); 842 843 trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); 844 845 return err; 846 } 847 848 /* 849 * All the block addresses of data and nodes should be nullified. 850 */ 851 int truncate_inode_blocks(struct inode *inode, pgoff_t from) 852 { 853 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 854 int err = 0, cont = 1; 855 int level, offset[4], noffset[4]; 856 unsigned int nofs = 0; 857 struct f2fs_inode *ri; 858 struct dnode_of_data dn; 859 struct page *page; 860 861 trace_f2fs_truncate_inode_blocks_enter(inode, from); 862 863 level = get_node_path(inode, from, offset, noffset); 864 865 page = get_node_page(sbi, inode->i_ino); 866 if (IS_ERR(page)) { 867 trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); 868 return PTR_ERR(page); 869 } 870 871 set_new_dnode(&dn, inode, page, NULL, 0); 872 unlock_page(page); 873 874 ri = F2FS_INODE(page); 875 switch (level) { 876 case 0: 877 case 1: 878 nofs = noffset[1]; 879 break; 880 case 2: 881 nofs = noffset[1]; 882 if (!offset[level - 1]) 883 goto skip_partial; 884 err = truncate_partial_nodes(&dn, ri, offset, level); 885 if (err < 0 && err != -ENOENT) 886 goto fail; 887 nofs += 1 + NIDS_PER_BLOCK; 888 break; 889 case 3: 890 nofs = 5 + 2 * NIDS_PER_BLOCK; 891 if (!offset[level - 1]) 892 goto skip_partial; 893 err = truncate_partial_nodes(&dn, ri, offset, level); 894 if (err < 0 && err != -ENOENT) 895 goto fail; 896 break; 897 default: 898 BUG(); 899 } 900 901 skip_partial: 902 while (cont) { 903 dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); 904 switch (offset[0]) { 905 case NODE_DIR1_BLOCK: 906 case NODE_DIR2_BLOCK: 907 err = truncate_dnode(&dn); 908 break; 909 910 case NODE_IND1_BLOCK: 911 case NODE_IND2_BLOCK: 912 err = truncate_nodes(&dn, nofs, offset[1], 2); 913 break; 914 915 case NODE_DIND_BLOCK: 916 err = truncate_nodes(&dn, nofs, offset[1], 3); 917 cont = 0; 918 break; 919 920 default: 921 BUG(); 922 } 923 if (err < 0 && err != -ENOENT) 924 goto fail; 925 if (offset[1] == 0 && 926 ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { 927 lock_page(page); 928 BUG_ON(page->mapping != NODE_MAPPING(sbi)); 929 f2fs_wait_on_page_writeback(page, NODE, true); 930 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; 931 set_page_dirty(page); 932 unlock_page(page); 933 } 934 offset[1] = 0; 935 offset[0]++; 936 nofs += err; 937 } 938 fail: 939 f2fs_put_page(page, 0); 940 trace_f2fs_truncate_inode_blocks_exit(inode, err); 941 return err > 0 ? 0 : err; 942 } 943 944 int truncate_xattr_node(struct inode *inode, struct page *page) 945 { 946 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 947 nid_t nid = F2FS_I(inode)->i_xattr_nid; 948 struct dnode_of_data dn; 949 struct page *npage; 950 951 if (!nid) 952 return 0; 953 954 npage = get_node_page(sbi, nid); 955 if (IS_ERR(npage)) 956 return PTR_ERR(npage); 957 958 f2fs_i_xnid_write(inode, 0); 959 960 /* need to do checkpoint during fsync */ 961 F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); 962 963 set_new_dnode(&dn, inode, page, npage, nid); 964 965 if (page) 966 dn.inode_page_locked = true; 967 truncate_node(&dn); 968 return 0; 969 } 970 971 /* 972 * Caller should grab and release a rwsem by calling f2fs_lock_op() and 973 * f2fs_unlock_op(). 974 */ 975 int remove_inode_page(struct inode *inode) 976 { 977 struct dnode_of_data dn; 978 int err; 979 980 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); 981 err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); 982 if (err) 983 return err; 984 985 err = truncate_xattr_node(inode, dn.inode_page); 986 if (err) { 987 f2fs_put_dnode(&dn); 988 return err; 989 } 990 991 /* remove potential inline_data blocks */ 992 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 993 S_ISLNK(inode->i_mode)) 994 truncate_data_blocks_range(&dn, 1); 995 996 /* 0 is possible, after f2fs_new_inode() has failed */ 997 f2fs_bug_on(F2FS_I_SB(inode), 998 inode->i_blocks != 0 && inode->i_blocks != 1); 999 1000 /* will put inode & node pages */ 1001 truncate_node(&dn); 1002 return 0; 1003 } 1004 1005 struct page *new_inode_page(struct inode *inode) 1006 { 1007 struct dnode_of_data dn; 1008 1009 /* allocate inode page for new inode */ 1010 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); 1011 1012 /* caller should f2fs_put_page(page, 1); */ 1013 return new_node_page(&dn, 0, NULL); 1014 } 1015 1016 struct page *new_node_page(struct dnode_of_data *dn, 1017 unsigned int ofs, struct page *ipage) 1018 { 1019 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1020 struct node_info old_ni, new_ni; 1021 struct page *page; 1022 int err; 1023 1024 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) 1025 return ERR_PTR(-EPERM); 1026 1027 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false); 1028 if (!page) 1029 return ERR_PTR(-ENOMEM); 1030 1031 if (unlikely(!inc_valid_node_count(sbi, dn->inode))) { 1032 err = -ENOSPC; 1033 goto fail; 1034 } 1035 1036 get_node_info(sbi, dn->nid, &old_ni); 1037 1038 /* Reinitialize old_ni with new node page */ 1039 f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR); 1040 new_ni = old_ni; 1041 new_ni.ino = dn->inode->i_ino; 1042 set_node_addr(sbi, &new_ni, NEW_ADDR, false); 1043 1044 f2fs_wait_on_page_writeback(page, NODE, true); 1045 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); 1046 set_cold_node(dn->inode, page); 1047 if (!PageUptodate(page)) 1048 SetPageUptodate(page); 1049 if (set_page_dirty(page)) 1050 dn->node_changed = true; 1051 1052 if (f2fs_has_xattr_block(ofs)) 1053 f2fs_i_xnid_write(dn->inode, dn->nid); 1054 1055 if (ofs == 0) 1056 inc_valid_inode_count(sbi); 1057 return page; 1058 1059 fail: 1060 clear_node_page_dirty(page); 1061 f2fs_put_page(page, 1); 1062 return ERR_PTR(err); 1063 } 1064 1065 /* 1066 * Caller should do after getting the following values. 1067 * 0: f2fs_put_page(page, 0) 1068 * LOCKED_PAGE or error: f2fs_put_page(page, 1) 1069 */ 1070 static int read_node_page(struct page *page, int op_flags) 1071 { 1072 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 1073 struct node_info ni; 1074 struct f2fs_io_info fio = { 1075 .sbi = sbi, 1076 .type = NODE, 1077 .op = REQ_OP_READ, 1078 .op_flags = op_flags, 1079 .page = page, 1080 .encrypted_page = NULL, 1081 }; 1082 1083 if (PageUptodate(page)) 1084 return LOCKED_PAGE; 1085 1086 get_node_info(sbi, page->index, &ni); 1087 1088 if (unlikely(ni.blk_addr == NULL_ADDR)) { 1089 ClearPageUptodate(page); 1090 return -ENOENT; 1091 } 1092 1093 fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr; 1094 return f2fs_submit_page_bio(&fio); 1095 } 1096 1097 /* 1098 * Readahead a node page 1099 */ 1100 void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) 1101 { 1102 struct page *apage; 1103 int err; 1104 1105 if (!nid) 1106 return; 1107 f2fs_bug_on(sbi, check_nid_range(sbi, nid)); 1108 1109 rcu_read_lock(); 1110 apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid); 1111 rcu_read_unlock(); 1112 if (apage) 1113 return; 1114 1115 apage = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); 1116 if (!apage) 1117 return; 1118 1119 err = read_node_page(apage, REQ_RAHEAD); 1120 f2fs_put_page(apage, err ? 1 : 0); 1121 } 1122 1123 static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, 1124 struct page *parent, int start) 1125 { 1126 struct page *page; 1127 int err; 1128 1129 if (!nid) 1130 return ERR_PTR(-ENOENT); 1131 f2fs_bug_on(sbi, check_nid_range(sbi, nid)); 1132 repeat: 1133 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); 1134 if (!page) 1135 return ERR_PTR(-ENOMEM); 1136 1137 err = read_node_page(page, READ_SYNC); 1138 if (err < 0) { 1139 f2fs_put_page(page, 1); 1140 return ERR_PTR(err); 1141 } else if (err == LOCKED_PAGE) { 1142 goto page_hit; 1143 } 1144 1145 if (parent) 1146 ra_node_pages(parent, start + 1, MAX_RA_NODE); 1147 1148 lock_page(page); 1149 1150 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1151 f2fs_put_page(page, 1); 1152 goto repeat; 1153 } 1154 1155 if (unlikely(!PageUptodate(page))) 1156 goto out_err; 1157 page_hit: 1158 if(unlikely(nid != nid_of_node(page))) { 1159 f2fs_bug_on(sbi, 1); 1160 ClearPageUptodate(page); 1161 out_err: 1162 f2fs_put_page(page, 1); 1163 return ERR_PTR(-EIO); 1164 } 1165 return page; 1166 } 1167 1168 struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) 1169 { 1170 return __get_node_page(sbi, nid, NULL, 0); 1171 } 1172 1173 struct page *get_node_page_ra(struct page *parent, int start) 1174 { 1175 struct f2fs_sb_info *sbi = F2FS_P_SB(parent); 1176 nid_t nid = get_nid(parent, start, false); 1177 1178 return __get_node_page(sbi, nid, parent, start); 1179 } 1180 1181 static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) 1182 { 1183 struct inode *inode; 1184 struct page *page; 1185 int ret; 1186 1187 /* should flush inline_data before evict_inode */ 1188 inode = ilookup(sbi->sb, ino); 1189 if (!inode) 1190 return; 1191 1192 page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0); 1193 if (!page) 1194 goto iput_out; 1195 1196 if (!PageUptodate(page)) 1197 goto page_out; 1198 1199 if (!PageDirty(page)) 1200 goto page_out; 1201 1202 if (!clear_page_dirty_for_io(page)) 1203 goto page_out; 1204 1205 ret = f2fs_write_inline_data(inode, page); 1206 inode_dec_dirty_pages(inode); 1207 if (ret) 1208 set_page_dirty(page); 1209 page_out: 1210 f2fs_put_page(page, 1); 1211 iput_out: 1212 iput(inode); 1213 } 1214 1215 void move_node_page(struct page *node_page, int gc_type) 1216 { 1217 if (gc_type == FG_GC) { 1218 struct f2fs_sb_info *sbi = F2FS_P_SB(node_page); 1219 struct writeback_control wbc = { 1220 .sync_mode = WB_SYNC_ALL, 1221 .nr_to_write = 1, 1222 .for_reclaim = 0, 1223 }; 1224 1225 set_page_dirty(node_page); 1226 f2fs_wait_on_page_writeback(node_page, NODE, true); 1227 1228 f2fs_bug_on(sbi, PageWriteback(node_page)); 1229 if (!clear_page_dirty_for_io(node_page)) 1230 goto out_page; 1231 1232 if (NODE_MAPPING(sbi)->a_ops->writepage(node_page, &wbc)) 1233 unlock_page(node_page); 1234 goto release_page; 1235 } else { 1236 /* set page dirty and write it */ 1237 if (!PageWriteback(node_page)) 1238 set_page_dirty(node_page); 1239 } 1240 out_page: 1241 unlock_page(node_page); 1242 release_page: 1243 f2fs_put_page(node_page, 0); 1244 } 1245 1246 static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) 1247 { 1248 pgoff_t index, end; 1249 struct pagevec pvec; 1250 struct page *last_page = NULL; 1251 1252 pagevec_init(&pvec, 0); 1253 index = 0; 1254 end = ULONG_MAX; 1255 1256 while (index <= end) { 1257 int i, nr_pages; 1258 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1259 PAGECACHE_TAG_DIRTY, 1260 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 1261 if (nr_pages == 0) 1262 break; 1263 1264 for (i = 0; i < nr_pages; i++) { 1265 struct page *page = pvec.pages[i]; 1266 1267 if (unlikely(f2fs_cp_error(sbi))) { 1268 f2fs_put_page(last_page, 0); 1269 pagevec_release(&pvec); 1270 return ERR_PTR(-EIO); 1271 } 1272 1273 if (!IS_DNODE(page) || !is_cold_node(page)) 1274 continue; 1275 if (ino_of_node(page) != ino) 1276 continue; 1277 1278 lock_page(page); 1279 1280 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1281 continue_unlock: 1282 unlock_page(page); 1283 continue; 1284 } 1285 if (ino_of_node(page) != ino) 1286 goto continue_unlock; 1287 1288 if (!PageDirty(page)) { 1289 /* someone wrote it for us */ 1290 goto continue_unlock; 1291 } 1292 1293 if (last_page) 1294 f2fs_put_page(last_page, 0); 1295 1296 get_page(page); 1297 last_page = page; 1298 unlock_page(page); 1299 } 1300 pagevec_release(&pvec); 1301 cond_resched(); 1302 } 1303 return last_page; 1304 } 1305 1306 int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, 1307 struct writeback_control *wbc, bool atomic) 1308 { 1309 pgoff_t index, end; 1310 struct pagevec pvec; 1311 int ret = 0; 1312 struct page *last_page = NULL; 1313 bool marked = false; 1314 nid_t ino = inode->i_ino; 1315 int nwritten = 0; 1316 1317 if (atomic) { 1318 last_page = last_fsync_dnode(sbi, ino); 1319 if (IS_ERR_OR_NULL(last_page)) 1320 return PTR_ERR_OR_ZERO(last_page); 1321 } 1322 retry: 1323 pagevec_init(&pvec, 0); 1324 index = 0; 1325 end = ULONG_MAX; 1326 1327 while (index <= end) { 1328 int i, nr_pages; 1329 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1330 PAGECACHE_TAG_DIRTY, 1331 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 1332 if (nr_pages == 0) 1333 break; 1334 1335 for (i = 0; i < nr_pages; i++) { 1336 struct page *page = pvec.pages[i]; 1337 1338 if (unlikely(f2fs_cp_error(sbi))) { 1339 f2fs_put_page(last_page, 0); 1340 pagevec_release(&pvec); 1341 return -EIO; 1342 } 1343 1344 if (!IS_DNODE(page) || !is_cold_node(page)) 1345 continue; 1346 if (ino_of_node(page) != ino) 1347 continue; 1348 1349 lock_page(page); 1350 1351 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1352 continue_unlock: 1353 unlock_page(page); 1354 continue; 1355 } 1356 if (ino_of_node(page) != ino) 1357 goto continue_unlock; 1358 1359 if (!PageDirty(page) && page != last_page) { 1360 /* someone wrote it for us */ 1361 goto continue_unlock; 1362 } 1363 1364 f2fs_wait_on_page_writeback(page, NODE, true); 1365 BUG_ON(PageWriteback(page)); 1366 1367 if (!atomic || page == last_page) { 1368 set_fsync_mark(page, 1); 1369 if (IS_INODE(page)) { 1370 if (is_inode_flag_set(inode, 1371 FI_DIRTY_INODE)) 1372 update_inode(inode, page); 1373 set_dentry_mark(page, 1374 need_dentry_mark(sbi, ino)); 1375 } 1376 /* may be written by other thread */ 1377 if (!PageDirty(page)) 1378 set_page_dirty(page); 1379 } 1380 1381 if (!clear_page_dirty_for_io(page)) 1382 goto continue_unlock; 1383 1384 ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); 1385 if (ret) { 1386 unlock_page(page); 1387 f2fs_put_page(last_page, 0); 1388 break; 1389 } else { 1390 nwritten++; 1391 } 1392 1393 if (page == last_page) { 1394 f2fs_put_page(page, 0); 1395 marked = true; 1396 break; 1397 } 1398 } 1399 pagevec_release(&pvec); 1400 cond_resched(); 1401 1402 if (ret || marked) 1403 break; 1404 } 1405 if (!ret && atomic && !marked) { 1406 f2fs_msg(sbi->sb, KERN_DEBUG, 1407 "Retry to write fsync mark: ino=%u, idx=%lx", 1408 ino, last_page->index); 1409 lock_page(last_page); 1410 set_page_dirty(last_page); 1411 unlock_page(last_page); 1412 goto retry; 1413 } 1414 1415 if (nwritten) 1416 f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE); 1417 return ret ? -EIO: 0; 1418 } 1419 1420 int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc) 1421 { 1422 pgoff_t index, end; 1423 struct pagevec pvec; 1424 int step = 0; 1425 int nwritten = 0; 1426 int ret = 0; 1427 1428 pagevec_init(&pvec, 0); 1429 1430 next_step: 1431 index = 0; 1432 end = ULONG_MAX; 1433 1434 while (index <= end) { 1435 int i, nr_pages; 1436 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1437 PAGECACHE_TAG_DIRTY, 1438 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 1439 if (nr_pages == 0) 1440 break; 1441 1442 for (i = 0; i < nr_pages; i++) { 1443 struct page *page = pvec.pages[i]; 1444 1445 if (unlikely(f2fs_cp_error(sbi))) { 1446 pagevec_release(&pvec); 1447 ret = -EIO; 1448 goto out; 1449 } 1450 1451 /* 1452 * flushing sequence with step: 1453 * 0. indirect nodes 1454 * 1. dentry dnodes 1455 * 2. file dnodes 1456 */ 1457 if (step == 0 && IS_DNODE(page)) 1458 continue; 1459 if (step == 1 && (!IS_DNODE(page) || 1460 is_cold_node(page))) 1461 continue; 1462 if (step == 2 && (!IS_DNODE(page) || 1463 !is_cold_node(page))) 1464 continue; 1465 lock_node: 1466 if (!trylock_page(page)) 1467 continue; 1468 1469 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1470 continue_unlock: 1471 unlock_page(page); 1472 continue; 1473 } 1474 1475 if (!PageDirty(page)) { 1476 /* someone wrote it for us */ 1477 goto continue_unlock; 1478 } 1479 1480 /* flush inline_data */ 1481 if (is_inline_node(page)) { 1482 clear_inline_node(page); 1483 unlock_page(page); 1484 flush_inline_data(sbi, ino_of_node(page)); 1485 goto lock_node; 1486 } 1487 1488 f2fs_wait_on_page_writeback(page, NODE, true); 1489 1490 BUG_ON(PageWriteback(page)); 1491 if (!clear_page_dirty_for_io(page)) 1492 goto continue_unlock; 1493 1494 set_fsync_mark(page, 0); 1495 set_dentry_mark(page, 0); 1496 1497 if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc)) 1498 unlock_page(page); 1499 else 1500 nwritten++; 1501 1502 if (--wbc->nr_to_write == 0) 1503 break; 1504 } 1505 pagevec_release(&pvec); 1506 cond_resched(); 1507 1508 if (wbc->nr_to_write == 0) { 1509 step = 2; 1510 break; 1511 } 1512 } 1513 1514 if (step < 2) { 1515 step++; 1516 goto next_step; 1517 } 1518 out: 1519 if (nwritten) 1520 f2fs_submit_merged_bio(sbi, NODE, WRITE); 1521 return ret; 1522 } 1523 1524 int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) 1525 { 1526 pgoff_t index = 0, end = ULONG_MAX; 1527 struct pagevec pvec; 1528 int ret2 = 0, ret = 0; 1529 1530 pagevec_init(&pvec, 0); 1531 1532 while (index <= end) { 1533 int i, nr_pages; 1534 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1535 PAGECACHE_TAG_WRITEBACK, 1536 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 1537 if (nr_pages == 0) 1538 break; 1539 1540 for (i = 0; i < nr_pages; i++) { 1541 struct page *page = pvec.pages[i]; 1542 1543 /* until radix tree lookup accepts end_index */ 1544 if (unlikely(page->index > end)) 1545 continue; 1546 1547 if (ino && ino_of_node(page) == ino) { 1548 f2fs_wait_on_page_writeback(page, NODE, true); 1549 if (TestClearPageError(page)) 1550 ret = -EIO; 1551 } 1552 } 1553 pagevec_release(&pvec); 1554 cond_resched(); 1555 } 1556 1557 if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags))) 1558 ret2 = -ENOSPC; 1559 if (unlikely(test_and_clear_bit(AS_EIO, &NODE_MAPPING(sbi)->flags))) 1560 ret2 = -EIO; 1561 if (!ret) 1562 ret = ret2; 1563 return ret; 1564 } 1565 1566 static int f2fs_write_node_page(struct page *page, 1567 struct writeback_control *wbc) 1568 { 1569 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 1570 nid_t nid; 1571 struct node_info ni; 1572 struct f2fs_io_info fio = { 1573 .sbi = sbi, 1574 .type = NODE, 1575 .op = REQ_OP_WRITE, 1576 .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0, 1577 .page = page, 1578 .encrypted_page = NULL, 1579 }; 1580 1581 trace_f2fs_writepage(page, NODE); 1582 1583 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 1584 goto redirty_out; 1585 if (unlikely(f2fs_cp_error(sbi))) 1586 goto redirty_out; 1587 1588 /* get old block addr of this node page */ 1589 nid = nid_of_node(page); 1590 f2fs_bug_on(sbi, page->index != nid); 1591 1592 if (wbc->for_reclaim) { 1593 if (!down_read_trylock(&sbi->node_write)) 1594 goto redirty_out; 1595 } else { 1596 down_read(&sbi->node_write); 1597 } 1598 1599 get_node_info(sbi, nid, &ni); 1600 1601 /* This page is already truncated */ 1602 if (unlikely(ni.blk_addr == NULL_ADDR)) { 1603 ClearPageUptodate(page); 1604 dec_page_count(sbi, F2FS_DIRTY_NODES); 1605 up_read(&sbi->node_write); 1606 unlock_page(page); 1607 return 0; 1608 } 1609 1610 set_page_writeback(page); 1611 fio.old_blkaddr = ni.blk_addr; 1612 write_node_page(nid, &fio); 1613 set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); 1614 dec_page_count(sbi, F2FS_DIRTY_NODES); 1615 up_read(&sbi->node_write); 1616 1617 if (wbc->for_reclaim) 1618 f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE); 1619 1620 unlock_page(page); 1621 1622 if (unlikely(f2fs_cp_error(sbi))) 1623 f2fs_submit_merged_bio(sbi, NODE, WRITE); 1624 1625 return 0; 1626 1627 redirty_out: 1628 redirty_page_for_writepage(wbc, page); 1629 return AOP_WRITEPAGE_ACTIVATE; 1630 } 1631 1632 static int f2fs_write_node_pages(struct address_space *mapping, 1633 struct writeback_control *wbc) 1634 { 1635 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); 1636 struct blk_plug plug; 1637 long diff; 1638 1639 /* balancing f2fs's metadata in background */ 1640 f2fs_balance_fs_bg(sbi); 1641 1642 /* collect a number of dirty node pages and write together */ 1643 if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE)) 1644 goto skip_write; 1645 1646 trace_f2fs_writepages(mapping->host, wbc, NODE); 1647 1648 diff = nr_pages_to_write(sbi, NODE, wbc); 1649 wbc->sync_mode = WB_SYNC_NONE; 1650 blk_start_plug(&plug); 1651 sync_node_pages(sbi, wbc); 1652 blk_finish_plug(&plug); 1653 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); 1654 return 0; 1655 1656 skip_write: 1657 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES); 1658 trace_f2fs_writepages(mapping->host, wbc, NODE); 1659 return 0; 1660 } 1661 1662 static int f2fs_set_node_page_dirty(struct page *page) 1663 { 1664 trace_f2fs_set_page_dirty(page, NODE); 1665 1666 if (!PageUptodate(page)) 1667 SetPageUptodate(page); 1668 if (!PageDirty(page)) { 1669 f2fs_set_page_dirty_nobuffers(page); 1670 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); 1671 SetPagePrivate(page); 1672 f2fs_trace_pid(page); 1673 return 1; 1674 } 1675 return 0; 1676 } 1677 1678 /* 1679 * Structure of the f2fs node operations 1680 */ 1681 const struct address_space_operations f2fs_node_aops = { 1682 .writepage = f2fs_write_node_page, 1683 .writepages = f2fs_write_node_pages, 1684 .set_page_dirty = f2fs_set_node_page_dirty, 1685 .invalidatepage = f2fs_invalidate_page, 1686 .releasepage = f2fs_release_page, 1687 #ifdef CONFIG_MIGRATION 1688 .migratepage = f2fs_migrate_page, 1689 #endif 1690 }; 1691 1692 static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, 1693 nid_t n) 1694 { 1695 return radix_tree_lookup(&nm_i->free_nid_root, n); 1696 } 1697 1698 static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i, 1699 struct free_nid *i) 1700 { 1701 list_del(&i->list); 1702 radix_tree_delete(&nm_i->free_nid_root, i->nid); 1703 } 1704 1705 static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) 1706 { 1707 struct f2fs_nm_info *nm_i = NM_I(sbi); 1708 struct free_nid *i; 1709 struct nat_entry *ne; 1710 1711 if (!available_free_memory(sbi, FREE_NIDS)) 1712 return -1; 1713 1714 /* 0 nid should not be used */ 1715 if (unlikely(nid == 0)) 1716 return 0; 1717 1718 if (build) { 1719 /* do not add allocated nids */ 1720 ne = __lookup_nat_cache(nm_i, nid); 1721 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || 1722 nat_get_blkaddr(ne) != NULL_ADDR)) 1723 return 0; 1724 } 1725 1726 i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); 1727 i->nid = nid; 1728 i->state = NID_NEW; 1729 1730 if (radix_tree_preload(GFP_NOFS)) { 1731 kmem_cache_free(free_nid_slab, i); 1732 return 0; 1733 } 1734 1735 spin_lock(&nm_i->free_nid_list_lock); 1736 if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) { 1737 spin_unlock(&nm_i->free_nid_list_lock); 1738 radix_tree_preload_end(); 1739 kmem_cache_free(free_nid_slab, i); 1740 return 0; 1741 } 1742 list_add_tail(&i->list, &nm_i->free_nid_list); 1743 nm_i->fcnt++; 1744 spin_unlock(&nm_i->free_nid_list_lock); 1745 radix_tree_preload_end(); 1746 return 1; 1747 } 1748 1749 static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) 1750 { 1751 struct free_nid *i; 1752 bool need_free = false; 1753 1754 spin_lock(&nm_i->free_nid_list_lock); 1755 i = __lookup_free_nid_list(nm_i, nid); 1756 if (i && i->state == NID_NEW) { 1757 __del_from_free_nid_list(nm_i, i); 1758 nm_i->fcnt--; 1759 need_free = true; 1760 } 1761 spin_unlock(&nm_i->free_nid_list_lock); 1762 1763 if (need_free) 1764 kmem_cache_free(free_nid_slab, i); 1765 } 1766 1767 static void scan_nat_page(struct f2fs_sb_info *sbi, 1768 struct page *nat_page, nid_t start_nid) 1769 { 1770 struct f2fs_nm_info *nm_i = NM_I(sbi); 1771 struct f2fs_nat_block *nat_blk = page_address(nat_page); 1772 block_t blk_addr; 1773 int i; 1774 1775 i = start_nid % NAT_ENTRY_PER_BLOCK; 1776 1777 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { 1778 1779 if (unlikely(start_nid >= nm_i->max_nid)) 1780 break; 1781 1782 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); 1783 f2fs_bug_on(sbi, blk_addr == NEW_ADDR); 1784 if (blk_addr == NULL_ADDR) { 1785 if (add_free_nid(sbi, start_nid, true) < 0) 1786 break; 1787 } 1788 } 1789 } 1790 1791 void build_free_nids(struct f2fs_sb_info *sbi) 1792 { 1793 struct f2fs_nm_info *nm_i = NM_I(sbi); 1794 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1795 struct f2fs_journal *journal = curseg->journal; 1796 int i = 0; 1797 nid_t nid = nm_i->next_scan_nid; 1798 1799 /* Enough entries */ 1800 if (nm_i->fcnt >= NAT_ENTRY_PER_BLOCK) 1801 return; 1802 1803 /* readahead nat pages to be scanned */ 1804 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, 1805 META_NAT, true); 1806 1807 down_read(&nm_i->nat_tree_lock); 1808 1809 while (1) { 1810 struct page *page = get_current_nat_page(sbi, nid); 1811 1812 scan_nat_page(sbi, page, nid); 1813 f2fs_put_page(page, 1); 1814 1815 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); 1816 if (unlikely(nid >= nm_i->max_nid)) 1817 nid = 0; 1818 1819 if (++i >= FREE_NID_PAGES) 1820 break; 1821 } 1822 1823 /* go to the next free nat pages to find free nids abundantly */ 1824 nm_i->next_scan_nid = nid; 1825 1826 /* find free nids from current sum_pages */ 1827 down_read(&curseg->journal_rwsem); 1828 for (i = 0; i < nats_in_cursum(journal); i++) { 1829 block_t addr; 1830 1831 addr = le32_to_cpu(nat_in_journal(journal, i).block_addr); 1832 nid = le32_to_cpu(nid_in_journal(journal, i)); 1833 if (addr == NULL_ADDR) 1834 add_free_nid(sbi, nid, true); 1835 else 1836 remove_free_nid(nm_i, nid); 1837 } 1838 up_read(&curseg->journal_rwsem); 1839 up_read(&nm_i->nat_tree_lock); 1840 1841 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), 1842 nm_i->ra_nid_pages, META_NAT, false); 1843 } 1844 1845 /* 1846 * If this function returns success, caller can obtain a new nid 1847 * from second parameter of this function. 1848 * The returned nid could be used ino as well as nid when inode is created. 1849 */ 1850 bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) 1851 { 1852 struct f2fs_nm_info *nm_i = NM_I(sbi); 1853 struct free_nid *i = NULL; 1854 retry: 1855 #ifdef CONFIG_F2FS_FAULT_INJECTION 1856 if (time_to_inject(sbi, FAULT_ALLOC_NID)) 1857 return false; 1858 #endif 1859 if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids)) 1860 return false; 1861 1862 spin_lock(&nm_i->free_nid_list_lock); 1863 1864 /* We should not use stale free nids created by build_free_nids */ 1865 if (nm_i->fcnt && !on_build_free_nids(nm_i)) { 1866 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); 1867 list_for_each_entry(i, &nm_i->free_nid_list, list) 1868 if (i->state == NID_NEW) 1869 break; 1870 1871 f2fs_bug_on(sbi, i->state != NID_NEW); 1872 *nid = i->nid; 1873 i->state = NID_ALLOC; 1874 nm_i->fcnt--; 1875 spin_unlock(&nm_i->free_nid_list_lock); 1876 return true; 1877 } 1878 spin_unlock(&nm_i->free_nid_list_lock); 1879 1880 /* Let's scan nat pages and its caches to get free nids */ 1881 mutex_lock(&nm_i->build_lock); 1882 build_free_nids(sbi); 1883 mutex_unlock(&nm_i->build_lock); 1884 goto retry; 1885 } 1886 1887 /* 1888 * alloc_nid() should be called prior to this function. 1889 */ 1890 void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) 1891 { 1892 struct f2fs_nm_info *nm_i = NM_I(sbi); 1893 struct free_nid *i; 1894 1895 spin_lock(&nm_i->free_nid_list_lock); 1896 i = __lookup_free_nid_list(nm_i, nid); 1897 f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); 1898 __del_from_free_nid_list(nm_i, i); 1899 spin_unlock(&nm_i->free_nid_list_lock); 1900 1901 kmem_cache_free(free_nid_slab, i); 1902 } 1903 1904 /* 1905 * alloc_nid() should be called prior to this function. 1906 */ 1907 void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) 1908 { 1909 struct f2fs_nm_info *nm_i = NM_I(sbi); 1910 struct free_nid *i; 1911 bool need_free = false; 1912 1913 if (!nid) 1914 return; 1915 1916 spin_lock(&nm_i->free_nid_list_lock); 1917 i = __lookup_free_nid_list(nm_i, nid); 1918 f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); 1919 if (!available_free_memory(sbi, FREE_NIDS)) { 1920 __del_from_free_nid_list(nm_i, i); 1921 need_free = true; 1922 } else { 1923 i->state = NID_NEW; 1924 nm_i->fcnt++; 1925 } 1926 spin_unlock(&nm_i->free_nid_list_lock); 1927 1928 if (need_free) 1929 kmem_cache_free(free_nid_slab, i); 1930 } 1931 1932 int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) 1933 { 1934 struct f2fs_nm_info *nm_i = NM_I(sbi); 1935 struct free_nid *i, *next; 1936 int nr = nr_shrink; 1937 1938 if (nm_i->fcnt <= MAX_FREE_NIDS) 1939 return 0; 1940 1941 if (!mutex_trylock(&nm_i->build_lock)) 1942 return 0; 1943 1944 spin_lock(&nm_i->free_nid_list_lock); 1945 list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { 1946 if (nr_shrink <= 0 || nm_i->fcnt <= MAX_FREE_NIDS) 1947 break; 1948 if (i->state == NID_ALLOC) 1949 continue; 1950 __del_from_free_nid_list(nm_i, i); 1951 kmem_cache_free(free_nid_slab, i); 1952 nm_i->fcnt--; 1953 nr_shrink--; 1954 } 1955 spin_unlock(&nm_i->free_nid_list_lock); 1956 mutex_unlock(&nm_i->build_lock); 1957 1958 return nr - nr_shrink; 1959 } 1960 1961 void recover_inline_xattr(struct inode *inode, struct page *page) 1962 { 1963 void *src_addr, *dst_addr; 1964 size_t inline_size; 1965 struct page *ipage; 1966 struct f2fs_inode *ri; 1967 1968 ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); 1969 f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage)); 1970 1971 ri = F2FS_INODE(page); 1972 if (!(ri->i_inline & F2FS_INLINE_XATTR)) { 1973 clear_inode_flag(inode, FI_INLINE_XATTR); 1974 goto update_inode; 1975 } 1976 1977 dst_addr = inline_xattr_addr(ipage); 1978 src_addr = inline_xattr_addr(page); 1979 inline_size = inline_xattr_size(inode); 1980 1981 f2fs_wait_on_page_writeback(ipage, NODE, true); 1982 memcpy(dst_addr, src_addr, inline_size); 1983 update_inode: 1984 update_inode(inode, ipage); 1985 f2fs_put_page(ipage, 1); 1986 } 1987 1988 void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) 1989 { 1990 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1991 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; 1992 nid_t new_xnid = nid_of_node(page); 1993 struct node_info ni; 1994 1995 /* 1: invalidate the previous xattr nid */ 1996 if (!prev_xnid) 1997 goto recover_xnid; 1998 1999 /* Deallocate node address */ 2000 get_node_info(sbi, prev_xnid, &ni); 2001 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); 2002 invalidate_blocks(sbi, ni.blk_addr); 2003 dec_valid_node_count(sbi, inode); 2004 set_node_addr(sbi, &ni, NULL_ADDR, false); 2005 2006 recover_xnid: 2007 /* 2: allocate new xattr nid */ 2008 if (unlikely(!inc_valid_node_count(sbi, inode))) 2009 f2fs_bug_on(sbi, 1); 2010 2011 remove_free_nid(NM_I(sbi), new_xnid); 2012 get_node_info(sbi, new_xnid, &ni); 2013 ni.ino = inode->i_ino; 2014 set_node_addr(sbi, &ni, NEW_ADDR, false); 2015 f2fs_i_xnid_write(inode, new_xnid); 2016 2017 /* 3: update xattr blkaddr */ 2018 refresh_sit_entry(sbi, NEW_ADDR, blkaddr); 2019 set_node_addr(sbi, &ni, blkaddr, false); 2020 } 2021 2022 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) 2023 { 2024 struct f2fs_inode *src, *dst; 2025 nid_t ino = ino_of_node(page); 2026 struct node_info old_ni, new_ni; 2027 struct page *ipage; 2028 2029 get_node_info(sbi, ino, &old_ni); 2030 2031 if (unlikely(old_ni.blk_addr != NULL_ADDR)) 2032 return -EINVAL; 2033 retry: 2034 ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false); 2035 if (!ipage) { 2036 congestion_wait(BLK_RW_ASYNC, HZ/50); 2037 goto retry; 2038 } 2039 2040 /* Should not use this inode from free nid list */ 2041 remove_free_nid(NM_I(sbi), ino); 2042 2043 if (!PageUptodate(ipage)) 2044 SetPageUptodate(ipage); 2045 fill_node_footer(ipage, ino, ino, 0, true); 2046 2047 src = F2FS_INODE(page); 2048 dst = F2FS_INODE(ipage); 2049 2050 memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src); 2051 dst->i_size = 0; 2052 dst->i_blocks = cpu_to_le64(1); 2053 dst->i_links = cpu_to_le32(1); 2054 dst->i_xattr_nid = 0; 2055 dst->i_inline = src->i_inline & F2FS_INLINE_XATTR; 2056 2057 new_ni = old_ni; 2058 new_ni.ino = ino; 2059 2060 if (unlikely(!inc_valid_node_count(sbi, NULL))) 2061 WARN_ON(1); 2062 set_node_addr(sbi, &new_ni, NEW_ADDR, false); 2063 inc_valid_inode_count(sbi); 2064 set_page_dirty(ipage); 2065 f2fs_put_page(ipage, 1); 2066 return 0; 2067 } 2068 2069 int restore_node_summary(struct f2fs_sb_info *sbi, 2070 unsigned int segno, struct f2fs_summary_block *sum) 2071 { 2072 struct f2fs_node *rn; 2073 struct f2fs_summary *sum_entry; 2074 block_t addr; 2075 int bio_blocks = MAX_BIO_BLOCKS(sbi); 2076 int i, idx, last_offset, nrpages; 2077 2078 /* scan the node segment */ 2079 last_offset = sbi->blocks_per_seg; 2080 addr = START_BLOCK(sbi, segno); 2081 sum_entry = &sum->entries[0]; 2082 2083 for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { 2084 nrpages = min(last_offset - i, bio_blocks); 2085 2086 /* readahead node pages */ 2087 ra_meta_pages(sbi, addr, nrpages, META_POR, true); 2088 2089 for (idx = addr; idx < addr + nrpages; idx++) { 2090 struct page *page = get_tmp_page(sbi, idx); 2091 2092 rn = F2FS_NODE(page); 2093 sum_entry->nid = rn->footer.nid; 2094 sum_entry->version = 0; 2095 sum_entry->ofs_in_node = 0; 2096 sum_entry++; 2097 f2fs_put_page(page, 1); 2098 } 2099 2100 invalidate_mapping_pages(META_MAPPING(sbi), addr, 2101 addr + nrpages); 2102 } 2103 return 0; 2104 } 2105 2106 static void remove_nats_in_journal(struct f2fs_sb_info *sbi) 2107 { 2108 struct f2fs_nm_info *nm_i = NM_I(sbi); 2109 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 2110 struct f2fs_journal *journal = curseg->journal; 2111 int i; 2112 2113 down_write(&curseg->journal_rwsem); 2114 for (i = 0; i < nats_in_cursum(journal); i++) { 2115 struct nat_entry *ne; 2116 struct f2fs_nat_entry raw_ne; 2117 nid_t nid = le32_to_cpu(nid_in_journal(journal, i)); 2118 2119 raw_ne = nat_in_journal(journal, i); 2120 2121 ne = __lookup_nat_cache(nm_i, nid); 2122 if (!ne) { 2123 ne = grab_nat_entry(nm_i, nid); 2124 node_info_from_raw_nat(&ne->ni, &raw_ne); 2125 } 2126 __set_nat_cache_dirty(nm_i, ne); 2127 } 2128 update_nats_in_cursum(journal, -i); 2129 up_write(&curseg->journal_rwsem); 2130 } 2131 2132 static void __adjust_nat_entry_set(struct nat_entry_set *nes, 2133 struct list_head *head, int max) 2134 { 2135 struct nat_entry_set *cur; 2136 2137 if (nes->entry_cnt >= max) 2138 goto add_out; 2139 2140 list_for_each_entry(cur, head, set_list) { 2141 if (cur->entry_cnt >= nes->entry_cnt) { 2142 list_add(&nes->set_list, cur->set_list.prev); 2143 return; 2144 } 2145 } 2146 add_out: 2147 list_add_tail(&nes->set_list, head); 2148 } 2149 2150 static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, 2151 struct nat_entry_set *set) 2152 { 2153 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 2154 struct f2fs_journal *journal = curseg->journal; 2155 nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK; 2156 bool to_journal = true; 2157 struct f2fs_nat_block *nat_blk; 2158 struct nat_entry *ne, *cur; 2159 struct page *page = NULL; 2160 2161 /* 2162 * there are two steps to flush nat entries: 2163 * #1, flush nat entries to journal in current hot data summary block. 2164 * #2, flush nat entries to nat page. 2165 */ 2166 if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL)) 2167 to_journal = false; 2168 2169 if (to_journal) { 2170 down_write(&curseg->journal_rwsem); 2171 } else { 2172 page = get_next_nat_page(sbi, start_nid); 2173 nat_blk = page_address(page); 2174 f2fs_bug_on(sbi, !nat_blk); 2175 } 2176 2177 /* flush dirty nats in nat entry set */ 2178 list_for_each_entry_safe(ne, cur, &set->entry_list, list) { 2179 struct f2fs_nat_entry *raw_ne; 2180 nid_t nid = nat_get_nid(ne); 2181 int offset; 2182 2183 if (nat_get_blkaddr(ne) == NEW_ADDR) 2184 continue; 2185 2186 if (to_journal) { 2187 offset = lookup_journal_in_cursum(journal, 2188 NAT_JOURNAL, nid, 1); 2189 f2fs_bug_on(sbi, offset < 0); 2190 raw_ne = &nat_in_journal(journal, offset); 2191 nid_in_journal(journal, offset) = cpu_to_le32(nid); 2192 } else { 2193 raw_ne = &nat_blk->entries[nid - start_nid]; 2194 } 2195 raw_nat_from_node_info(raw_ne, &ne->ni); 2196 nat_reset_flag(ne); 2197 __clear_nat_cache_dirty(NM_I(sbi), ne); 2198 if (nat_get_blkaddr(ne) == NULL_ADDR) 2199 add_free_nid(sbi, nid, false); 2200 } 2201 2202 if (to_journal) 2203 up_write(&curseg->journal_rwsem); 2204 else 2205 f2fs_put_page(page, 1); 2206 2207 f2fs_bug_on(sbi, set->entry_cnt); 2208 2209 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); 2210 kmem_cache_free(nat_entry_set_slab, set); 2211 } 2212 2213 /* 2214 * This function is called during the checkpointing process. 2215 */ 2216 void flush_nat_entries(struct f2fs_sb_info *sbi) 2217 { 2218 struct f2fs_nm_info *nm_i = NM_I(sbi); 2219 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 2220 struct f2fs_journal *journal = curseg->journal; 2221 struct nat_entry_set *setvec[SETVEC_SIZE]; 2222 struct nat_entry_set *set, *tmp; 2223 unsigned int found; 2224 nid_t set_idx = 0; 2225 LIST_HEAD(sets); 2226 2227 if (!nm_i->dirty_nat_cnt) 2228 return; 2229 2230 down_write(&nm_i->nat_tree_lock); 2231 2232 /* 2233 * if there are no enough space in journal to store dirty nat 2234 * entries, remove all entries from journal and merge them 2235 * into nat entry set. 2236 */ 2237 if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL)) 2238 remove_nats_in_journal(sbi); 2239 2240 while ((found = __gang_lookup_nat_set(nm_i, 2241 set_idx, SETVEC_SIZE, setvec))) { 2242 unsigned idx; 2243 set_idx = setvec[found - 1]->set + 1; 2244 for (idx = 0; idx < found; idx++) 2245 __adjust_nat_entry_set(setvec[idx], &sets, 2246 MAX_NAT_JENTRIES(journal)); 2247 } 2248 2249 /* flush dirty nats in nat entry set */ 2250 list_for_each_entry_safe(set, tmp, &sets, set_list) 2251 __flush_nat_entry_set(sbi, set); 2252 2253 up_write(&nm_i->nat_tree_lock); 2254 2255 f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); 2256 } 2257 2258 static int init_node_manager(struct f2fs_sb_info *sbi) 2259 { 2260 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); 2261 struct f2fs_nm_info *nm_i = NM_I(sbi); 2262 unsigned char *version_bitmap; 2263 unsigned int nat_segs, nat_blocks; 2264 2265 nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr); 2266 2267 /* segment_count_nat includes pair segment so divide to 2. */ 2268 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; 2269 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); 2270 2271 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; 2272 2273 /* not used nids: 0, node, meta, (and root counted as valid node) */ 2274 nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM; 2275 nm_i->fcnt = 0; 2276 nm_i->nat_cnt = 0; 2277 nm_i->ram_thresh = DEF_RAM_THRESHOLD; 2278 nm_i->ra_nid_pages = DEF_RA_NID_PAGES; 2279 nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; 2280 2281 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); 2282 INIT_LIST_HEAD(&nm_i->free_nid_list); 2283 INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); 2284 INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); 2285 INIT_LIST_HEAD(&nm_i->nat_entries); 2286 2287 mutex_init(&nm_i->build_lock); 2288 spin_lock_init(&nm_i->free_nid_list_lock); 2289 init_rwsem(&nm_i->nat_tree_lock); 2290 2291 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); 2292 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); 2293 version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); 2294 if (!version_bitmap) 2295 return -EFAULT; 2296 2297 nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size, 2298 GFP_KERNEL); 2299 if (!nm_i->nat_bitmap) 2300 return -ENOMEM; 2301 return 0; 2302 } 2303 2304 int build_node_manager(struct f2fs_sb_info *sbi) 2305 { 2306 int err; 2307 2308 sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL); 2309 if (!sbi->nm_info) 2310 return -ENOMEM; 2311 2312 err = init_node_manager(sbi); 2313 if (err) 2314 return err; 2315 2316 build_free_nids(sbi); 2317 return 0; 2318 } 2319 2320 void destroy_node_manager(struct f2fs_sb_info *sbi) 2321 { 2322 struct f2fs_nm_info *nm_i = NM_I(sbi); 2323 struct free_nid *i, *next_i; 2324 struct nat_entry *natvec[NATVEC_SIZE]; 2325 struct nat_entry_set *setvec[SETVEC_SIZE]; 2326 nid_t nid = 0; 2327 unsigned int found; 2328 2329 if (!nm_i) 2330 return; 2331 2332 /* destroy free nid list */ 2333 spin_lock(&nm_i->free_nid_list_lock); 2334 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { 2335 f2fs_bug_on(sbi, i->state == NID_ALLOC); 2336 __del_from_free_nid_list(nm_i, i); 2337 nm_i->fcnt--; 2338 spin_unlock(&nm_i->free_nid_list_lock); 2339 kmem_cache_free(free_nid_slab, i); 2340 spin_lock(&nm_i->free_nid_list_lock); 2341 } 2342 f2fs_bug_on(sbi, nm_i->fcnt); 2343 spin_unlock(&nm_i->free_nid_list_lock); 2344 2345 /* destroy nat cache */ 2346 down_write(&nm_i->nat_tree_lock); 2347 while ((found = __gang_lookup_nat_cache(nm_i, 2348 nid, NATVEC_SIZE, natvec))) { 2349 unsigned idx; 2350 2351 nid = nat_get_nid(natvec[found - 1]) + 1; 2352 for (idx = 0; idx < found; idx++) 2353 __del_from_nat_cache(nm_i, natvec[idx]); 2354 } 2355 f2fs_bug_on(sbi, nm_i->nat_cnt); 2356 2357 /* destroy nat set cache */ 2358 nid = 0; 2359 while ((found = __gang_lookup_nat_set(nm_i, 2360 nid, SETVEC_SIZE, setvec))) { 2361 unsigned idx; 2362 2363 nid = setvec[found - 1]->set + 1; 2364 for (idx = 0; idx < found; idx++) { 2365 /* entry_cnt is not zero, when cp_error was occurred */ 2366 f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list)); 2367 radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set); 2368 kmem_cache_free(nat_entry_set_slab, setvec[idx]); 2369 } 2370 } 2371 up_write(&nm_i->nat_tree_lock); 2372 2373 kfree(nm_i->nat_bitmap); 2374 sbi->nm_info = NULL; 2375 kfree(nm_i); 2376 } 2377 2378 int __init create_node_manager_caches(void) 2379 { 2380 nat_entry_slab = f2fs_kmem_cache_create("nat_entry", 2381 sizeof(struct nat_entry)); 2382 if (!nat_entry_slab) 2383 goto fail; 2384 2385 free_nid_slab = f2fs_kmem_cache_create("free_nid", 2386 sizeof(struct free_nid)); 2387 if (!free_nid_slab) 2388 goto destroy_nat_entry; 2389 2390 nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set", 2391 sizeof(struct nat_entry_set)); 2392 if (!nat_entry_set_slab) 2393 goto destroy_free_nid; 2394 return 0; 2395 2396 destroy_free_nid: 2397 kmem_cache_destroy(free_nid_slab); 2398 destroy_nat_entry: 2399 kmem_cache_destroy(nat_entry_slab); 2400 fail: 2401 return -ENOMEM; 2402 } 2403 2404 void destroy_node_manager_caches(void) 2405 { 2406 kmem_cache_destroy(nat_entry_set_slab); 2407 kmem_cache_destroy(free_nid_slab); 2408 kmem_cache_destroy(nat_entry_slab); 2409 } 2410