1 /* 2 * fs/f2fs/gc.c 3 * 4 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 5 * http://www.samsung.com/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 #include <linux/fs.h> 12 #include <linux/module.h> 13 #include <linux/backing-dev.h> 14 #include <linux/init.h> 15 #include <linux/f2fs_fs.h> 16 #include <linux/kthread.h> 17 #include <linux/delay.h> 18 #include <linux/freezer.h> 19 20 #include "f2fs.h" 21 #include "node.h" 22 #include "segment.h" 23 #include "gc.h" 24 #include <trace/events/f2fs.h> 25 26 static int gc_thread_func(void *data) 27 { 28 struct f2fs_sb_info *sbi = data; 29 struct f2fs_gc_kthread *gc_th = sbi->gc_thread; 30 wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; 31 long wait_ms; 32 33 wait_ms = gc_th->min_sleep_time; 34 35 do { 36 if (try_to_freeze()) 37 continue; 38 else 39 wait_event_interruptible_timeout(*wq, 40 kthread_should_stop(), 41 msecs_to_jiffies(wait_ms)); 42 if (kthread_should_stop()) 43 break; 44 45 if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) { 46 increase_sleep_time(gc_th, &wait_ms); 47 continue; 48 } 49 50 /* 51 * [GC triggering condition] 52 * 0. GC is not conducted currently. 53 * 1. There are enough dirty segments. 54 * 2. IO subsystem is idle by checking the # of writeback pages. 55 * 3. IO subsystem is idle by checking the # of requests in 56 * bdev's request list. 57 * 58 * Note) We have to avoid triggering GCs frequently. 59 * Because it is possible that some segments can be 60 * invalidated soon after by user update or deletion. 61 * So, I'd like to wait some time to collect dirty segments. 62 */ 63 if (!mutex_trylock(&sbi->gc_mutex)) 64 continue; 65 66 if (!is_idle(sbi)) { 67 increase_sleep_time(gc_th, &wait_ms); 68 mutex_unlock(&sbi->gc_mutex); 69 continue; 70 } 71 72 if (has_enough_invalid_blocks(sbi)) 73 decrease_sleep_time(gc_th, &wait_ms); 74 else 75 increase_sleep_time(gc_th, &wait_ms); 76 77 stat_inc_bggc_count(sbi); 78 79 /* if return value is not zero, no victim was selected */ 80 if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC))) 81 wait_ms = gc_th->no_gc_sleep_time; 82 83 trace_f2fs_background_gc(sbi->sb, wait_ms, 84 prefree_segments(sbi), free_segments(sbi)); 85 86 /* balancing f2fs's metadata periodically */ 87 f2fs_balance_fs_bg(sbi); 88 89 } while (!kthread_should_stop()); 90 return 0; 91 } 92 93 int start_gc_thread(struct f2fs_sb_info *sbi) 94 { 95 struct f2fs_gc_kthread *gc_th; 96 dev_t dev = sbi->sb->s_bdev->bd_dev; 97 int err = 0; 98 99 gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); 100 if (!gc_th) { 101 err = -ENOMEM; 102 goto out; 103 } 104 105 gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; 106 gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; 107 gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; 108 109 gc_th->gc_idle = 0; 110 111 sbi->gc_thread = gc_th; 112 init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); 113 sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, 114 "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); 115 if (IS_ERR(gc_th->f2fs_gc_task)) { 116 err = PTR_ERR(gc_th->f2fs_gc_task); 117 kfree(gc_th); 118 sbi->gc_thread = NULL; 119 } 120 out: 121 return err; 122 } 123 124 void stop_gc_thread(struct f2fs_sb_info *sbi) 125 { 126 struct f2fs_gc_kthread *gc_th = sbi->gc_thread; 127 if (!gc_th) 128 return; 129 kthread_stop(gc_th->f2fs_gc_task); 130 kfree(gc_th); 131 sbi->gc_thread = NULL; 132 } 133 134 static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type) 135 { 136 int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; 137 138 if (gc_th && gc_th->gc_idle) { 139 if (gc_th->gc_idle == 1) 140 gc_mode = GC_CB; 141 else if (gc_th->gc_idle == 2) 142 gc_mode = GC_GREEDY; 143 } 144 return gc_mode; 145 } 146 147 static void select_policy(struct f2fs_sb_info *sbi, int gc_type, 148 int type, struct victim_sel_policy *p) 149 { 150 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 151 152 if (p->alloc_mode == SSR) { 153 p->gc_mode = GC_GREEDY; 154 p->dirty_segmap = dirty_i->dirty_segmap[type]; 155 p->max_search = dirty_i->nr_dirty[type]; 156 p->ofs_unit = 1; 157 } else { 158 p->gc_mode = select_gc_type(sbi->gc_thread, gc_type); 159 p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; 160 p->max_search = dirty_i->nr_dirty[DIRTY]; 161 p->ofs_unit = sbi->segs_per_sec; 162 } 163 164 if (p->max_search > sbi->max_victim_search) 165 p->max_search = sbi->max_victim_search; 166 167 p->offset = sbi->last_victim[p->gc_mode]; 168 } 169 170 static unsigned int get_max_cost(struct f2fs_sb_info *sbi, 171 struct victim_sel_policy *p) 172 { 173 /* SSR allocates in a segment unit */ 174 if (p->alloc_mode == SSR) 175 return sbi->blocks_per_seg; 176 if (p->gc_mode == GC_GREEDY) 177 return sbi->blocks_per_seg * p->ofs_unit; 178 else if (p->gc_mode == GC_CB) 179 return UINT_MAX; 180 else /* No other gc_mode */ 181 return 0; 182 } 183 184 static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) 185 { 186 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 187 unsigned int secno; 188 189 /* 190 * If the gc_type is FG_GC, we can select victim segments 191 * selected by background GC before. 192 * Those segments guarantee they have small valid blocks. 193 */ 194 for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) { 195 if (sec_usage_check(sbi, secno)) 196 continue; 197 clear_bit(secno, dirty_i->victim_secmap); 198 return secno * sbi->segs_per_sec; 199 } 200 return NULL_SEGNO; 201 } 202 203 static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) 204 { 205 struct sit_info *sit_i = SIT_I(sbi); 206 unsigned int secno = GET_SECNO(sbi, segno); 207 unsigned int start = secno * sbi->segs_per_sec; 208 unsigned long long mtime = 0; 209 unsigned int vblocks; 210 unsigned char age = 0; 211 unsigned char u; 212 unsigned int i; 213 214 for (i = 0; i < sbi->segs_per_sec; i++) 215 mtime += get_seg_entry(sbi, start + i)->mtime; 216 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); 217 218 mtime = div_u64(mtime, sbi->segs_per_sec); 219 vblocks = div_u64(vblocks, sbi->segs_per_sec); 220 221 u = (vblocks * 100) >> sbi->log_blocks_per_seg; 222 223 /* Handle if the system time has changed by the user */ 224 if (mtime < sit_i->min_mtime) 225 sit_i->min_mtime = mtime; 226 if (mtime > sit_i->max_mtime) 227 sit_i->max_mtime = mtime; 228 if (sit_i->max_mtime != sit_i->min_mtime) 229 age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime), 230 sit_i->max_mtime - sit_i->min_mtime); 231 232 return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); 233 } 234 235 static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, 236 unsigned int segno, struct victim_sel_policy *p) 237 { 238 if (p->alloc_mode == SSR) 239 return get_seg_entry(sbi, segno)->ckpt_valid_blocks; 240 241 /* alloc_mode == LFS */ 242 if (p->gc_mode == GC_GREEDY) 243 return get_valid_blocks(sbi, segno, sbi->segs_per_sec); 244 else 245 return get_cb_cost(sbi, segno); 246 } 247 248 /* 249 * This function is called from two paths. 250 * One is garbage collection and the other is SSR segment selection. 251 * When it is called during GC, it just gets a victim segment 252 * and it does not remove it from dirty seglist. 253 * When it is called from SSR segment selection, it finds a segment 254 * which has minimum valid blocks and removes it from dirty seglist. 255 */ 256 static int get_victim_by_default(struct f2fs_sb_info *sbi, 257 unsigned int *result, int gc_type, int type, char alloc_mode) 258 { 259 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 260 struct victim_sel_policy p; 261 unsigned int secno, max_cost; 262 unsigned int last_segment = MAIN_SEGS(sbi); 263 int nsearched = 0; 264 265 mutex_lock(&dirty_i->seglist_lock); 266 267 p.alloc_mode = alloc_mode; 268 select_policy(sbi, gc_type, type, &p); 269 270 p.min_segno = NULL_SEGNO; 271 p.min_cost = max_cost = get_max_cost(sbi, &p); 272 273 if (p.max_search == 0) 274 goto out; 275 276 if (p.alloc_mode == LFS && gc_type == FG_GC) { 277 p.min_segno = check_bg_victims(sbi); 278 if (p.min_segno != NULL_SEGNO) 279 goto got_it; 280 } 281 282 while (1) { 283 unsigned long cost; 284 unsigned int segno; 285 286 segno = find_next_bit(p.dirty_segmap, last_segment, p.offset); 287 if (segno >= last_segment) { 288 if (sbi->last_victim[p.gc_mode]) { 289 last_segment = sbi->last_victim[p.gc_mode]; 290 sbi->last_victim[p.gc_mode] = 0; 291 p.offset = 0; 292 continue; 293 } 294 break; 295 } 296 297 p.offset = segno + p.ofs_unit; 298 if (p.ofs_unit > 1) 299 p.offset -= segno % p.ofs_unit; 300 301 secno = GET_SECNO(sbi, segno); 302 303 if (sec_usage_check(sbi, secno)) 304 continue; 305 if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) 306 continue; 307 308 cost = get_gc_cost(sbi, segno, &p); 309 310 if (p.min_cost > cost) { 311 p.min_segno = segno; 312 p.min_cost = cost; 313 } else if (unlikely(cost == max_cost)) { 314 continue; 315 } 316 317 if (nsearched++ >= p.max_search) { 318 sbi->last_victim[p.gc_mode] = segno; 319 break; 320 } 321 } 322 if (p.min_segno != NULL_SEGNO) { 323 got_it: 324 if (p.alloc_mode == LFS) { 325 secno = GET_SECNO(sbi, p.min_segno); 326 if (gc_type == FG_GC) 327 sbi->cur_victim_sec = secno; 328 else 329 set_bit(secno, dirty_i->victim_secmap); 330 } 331 *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; 332 333 trace_f2fs_get_victim(sbi->sb, type, gc_type, &p, 334 sbi->cur_victim_sec, 335 prefree_segments(sbi), free_segments(sbi)); 336 } 337 out: 338 mutex_unlock(&dirty_i->seglist_lock); 339 340 return (p.min_segno == NULL_SEGNO) ? 0 : 1; 341 } 342 343 static const struct victim_selection default_v_ops = { 344 .get_victim = get_victim_by_default, 345 }; 346 347 static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino) 348 { 349 struct inode_entry *ie; 350 351 ie = radix_tree_lookup(&gc_list->iroot, ino); 352 if (ie) 353 return ie->inode; 354 return NULL; 355 } 356 357 static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) 358 { 359 struct inode_entry *new_ie; 360 361 if (inode == find_gc_inode(gc_list, inode->i_ino)) { 362 iput(inode); 363 return; 364 } 365 new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); 366 new_ie->inode = inode; 367 368 f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie); 369 list_add_tail(&new_ie->list, &gc_list->ilist); 370 } 371 372 static void put_gc_inode(struct gc_inode_list *gc_list) 373 { 374 struct inode_entry *ie, *next_ie; 375 list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) { 376 radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); 377 iput(ie->inode); 378 list_del(&ie->list); 379 kmem_cache_free(inode_entry_slab, ie); 380 } 381 } 382 383 static int check_valid_map(struct f2fs_sb_info *sbi, 384 unsigned int segno, int offset) 385 { 386 struct sit_info *sit_i = SIT_I(sbi); 387 struct seg_entry *sentry; 388 int ret; 389 390 mutex_lock(&sit_i->sentry_lock); 391 sentry = get_seg_entry(sbi, segno); 392 ret = f2fs_test_bit(offset, sentry->cur_valid_map); 393 mutex_unlock(&sit_i->sentry_lock); 394 return ret; 395 } 396 397 /* 398 * This function compares node address got in summary with that in NAT. 399 * On validity, copy that node with cold status, otherwise (invalid node) 400 * ignore that. 401 */ 402 static int gc_node_segment(struct f2fs_sb_info *sbi, 403 struct f2fs_summary *sum, unsigned int segno, int gc_type) 404 { 405 bool initial = true; 406 struct f2fs_summary *entry; 407 block_t start_addr; 408 int off; 409 410 start_addr = START_BLOCK(sbi, segno); 411 412 next_step: 413 entry = sum; 414 415 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { 416 nid_t nid = le32_to_cpu(entry->nid); 417 struct page *node_page; 418 struct node_info ni; 419 420 /* stop BG_GC if there is not enough free sections. */ 421 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) 422 return 0; 423 424 if (check_valid_map(sbi, segno, off) == 0) 425 continue; 426 427 if (initial) { 428 ra_node_page(sbi, nid); 429 continue; 430 } 431 node_page = get_node_page(sbi, nid); 432 if (IS_ERR(node_page)) 433 continue; 434 435 /* block may become invalid during get_node_page */ 436 if (check_valid_map(sbi, segno, off) == 0) { 437 f2fs_put_page(node_page, 1); 438 continue; 439 } 440 441 get_node_info(sbi, nid, &ni); 442 if (ni.blk_addr != start_addr + off) { 443 f2fs_put_page(node_page, 1); 444 continue; 445 } 446 447 /* set page dirty and write it */ 448 if (gc_type == FG_GC) { 449 f2fs_wait_on_page_writeback(node_page, NODE); 450 set_page_dirty(node_page); 451 } else { 452 if (!PageWriteback(node_page)) 453 set_page_dirty(node_page); 454 } 455 f2fs_put_page(node_page, 1); 456 stat_inc_node_blk_count(sbi, 1, gc_type); 457 } 458 459 if (initial) { 460 initial = false; 461 goto next_step; 462 } 463 464 if (gc_type == FG_GC) { 465 struct writeback_control wbc = { 466 .sync_mode = WB_SYNC_ALL, 467 .nr_to_write = LONG_MAX, 468 .for_reclaim = 0, 469 }; 470 sync_node_pages(sbi, 0, &wbc); 471 472 /* return 1 only if FG_GC succefully reclaimed one */ 473 if (get_valid_blocks(sbi, segno, 1) == 0) 474 return 1; 475 } 476 return 0; 477 } 478 479 /* 480 * Calculate start block index indicating the given node offset. 481 * Be careful, caller should give this node offset only indicating direct node 482 * blocks. If any node offsets, which point the other types of node blocks such 483 * as indirect or double indirect node blocks, are given, it must be a caller's 484 * bug. 485 */ 486 block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi) 487 { 488 unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4; 489 unsigned int bidx; 490 491 if (node_ofs == 0) 492 return 0; 493 494 if (node_ofs <= 2) { 495 bidx = node_ofs - 1; 496 } else if (node_ofs <= indirect_blks) { 497 int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1); 498 bidx = node_ofs - 2 - dec; 499 } else { 500 int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); 501 bidx = node_ofs - 5 - dec; 502 } 503 return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi); 504 } 505 506 static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 507 struct node_info *dni, block_t blkaddr, unsigned int *nofs) 508 { 509 struct page *node_page; 510 nid_t nid; 511 unsigned int ofs_in_node; 512 block_t source_blkaddr; 513 514 nid = le32_to_cpu(sum->nid); 515 ofs_in_node = le16_to_cpu(sum->ofs_in_node); 516 517 node_page = get_node_page(sbi, nid); 518 if (IS_ERR(node_page)) 519 return false; 520 521 get_node_info(sbi, nid, dni); 522 523 if (sum->version != dni->version) { 524 f2fs_put_page(node_page, 1); 525 return false; 526 } 527 528 *nofs = ofs_of_node(node_page); 529 source_blkaddr = datablock_addr(node_page, ofs_in_node); 530 f2fs_put_page(node_page, 1); 531 532 if (source_blkaddr != blkaddr) 533 return false; 534 return true; 535 } 536 537 static void move_encrypted_block(struct inode *inode, block_t bidx) 538 { 539 struct f2fs_io_info fio = { 540 .sbi = F2FS_I_SB(inode), 541 .type = DATA, 542 .rw = READ_SYNC, 543 .encrypted_page = NULL, 544 }; 545 struct dnode_of_data dn; 546 struct f2fs_summary sum; 547 struct node_info ni; 548 struct page *page; 549 int err; 550 551 /* do not read out */ 552 page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); 553 if (!page) 554 return; 555 556 set_new_dnode(&dn, inode, NULL, NULL, 0); 557 err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE); 558 if (err) 559 goto out; 560 561 if (unlikely(dn.data_blkaddr == NULL_ADDR)) { 562 ClearPageUptodate(page); 563 goto put_out; 564 } 565 566 /* 567 * don't cache encrypted data into meta inode until previous dirty 568 * data were writebacked to avoid racing between GC and flush. 569 */ 570 f2fs_wait_on_page_writeback(page, DATA); 571 572 get_node_info(fio.sbi, dn.nid, &ni); 573 set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); 574 575 /* read page */ 576 fio.page = page; 577 fio.blk_addr = dn.data_blkaddr; 578 579 fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), 580 fio.blk_addr, 581 FGP_LOCK|FGP_CREAT, 582 GFP_NOFS); 583 if (!fio.encrypted_page) 584 goto put_out; 585 586 err = f2fs_submit_page_bio(&fio); 587 if (err) 588 goto put_page_out; 589 590 /* write page */ 591 lock_page(fio.encrypted_page); 592 593 if (unlikely(!PageUptodate(fio.encrypted_page))) 594 goto put_page_out; 595 if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) 596 goto put_page_out; 597 598 set_page_dirty(fio.encrypted_page); 599 f2fs_wait_on_page_writeback(fio.encrypted_page, DATA); 600 if (clear_page_dirty_for_io(fio.encrypted_page)) 601 dec_page_count(fio.sbi, F2FS_DIRTY_META); 602 603 set_page_writeback(fio.encrypted_page); 604 605 /* allocate block address */ 606 f2fs_wait_on_page_writeback(dn.node_page, NODE); 607 allocate_data_block(fio.sbi, NULL, fio.blk_addr, 608 &fio.blk_addr, &sum, CURSEG_COLD_DATA); 609 fio.rw = WRITE_SYNC; 610 f2fs_submit_page_mbio(&fio); 611 612 dn.data_blkaddr = fio.blk_addr; 613 set_data_blkaddr(&dn); 614 f2fs_update_extent_cache(&dn); 615 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); 616 if (page->index == 0) 617 set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); 618 put_page_out: 619 f2fs_put_page(fio.encrypted_page, 1); 620 put_out: 621 f2fs_put_dnode(&dn); 622 out: 623 f2fs_put_page(page, 1); 624 } 625 626 static void move_data_page(struct inode *inode, block_t bidx, int gc_type) 627 { 628 struct page *page; 629 630 page = get_lock_data_page(inode, bidx, true); 631 if (IS_ERR(page)) 632 return; 633 634 if (gc_type == BG_GC) { 635 if (PageWriteback(page)) 636 goto out; 637 set_page_dirty(page); 638 set_cold_data(page); 639 } else { 640 struct f2fs_io_info fio = { 641 .sbi = F2FS_I_SB(inode), 642 .type = DATA, 643 .rw = WRITE_SYNC, 644 .page = page, 645 .encrypted_page = NULL, 646 }; 647 set_page_dirty(page); 648 f2fs_wait_on_page_writeback(page, DATA); 649 if (clear_page_dirty_for_io(page)) 650 inode_dec_dirty_pages(inode); 651 set_cold_data(page); 652 do_write_data_page(&fio); 653 clear_cold_data(page); 654 } 655 out: 656 f2fs_put_page(page, 1); 657 } 658 659 /* 660 * This function tries to get parent node of victim data block, and identifies 661 * data block validity. If the block is valid, copy that with cold status and 662 * modify parent node. 663 * If the parent node is not valid or the data block address is different, 664 * the victim data block is ignored. 665 */ 666 static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 667 struct gc_inode_list *gc_list, unsigned int segno, int gc_type) 668 { 669 struct super_block *sb = sbi->sb; 670 struct f2fs_summary *entry; 671 block_t start_addr; 672 int off; 673 int phase = 0; 674 675 start_addr = START_BLOCK(sbi, segno); 676 677 next_step: 678 entry = sum; 679 680 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { 681 struct page *data_page; 682 struct inode *inode; 683 struct node_info dni; /* dnode info for the data */ 684 unsigned int ofs_in_node, nofs; 685 block_t start_bidx; 686 687 /* stop BG_GC if there is not enough free sections. */ 688 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) 689 return 0; 690 691 if (check_valid_map(sbi, segno, off) == 0) 692 continue; 693 694 if (phase == 0) { 695 ra_node_page(sbi, le32_to_cpu(entry->nid)); 696 continue; 697 } 698 699 /* Get an inode by ino with checking validity */ 700 if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs)) 701 continue; 702 703 if (phase == 1) { 704 ra_node_page(sbi, dni.ino); 705 continue; 706 } 707 708 ofs_in_node = le16_to_cpu(entry->ofs_in_node); 709 710 if (phase == 2) { 711 inode = f2fs_iget(sb, dni.ino); 712 if (IS_ERR(inode) || is_bad_inode(inode)) 713 continue; 714 715 /* if encrypted inode, let's go phase 3 */ 716 if (f2fs_encrypted_inode(inode) && 717 S_ISREG(inode->i_mode)) { 718 add_gc_inode(gc_list, inode); 719 continue; 720 } 721 722 start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); 723 data_page = get_read_data_page(inode, 724 start_bidx + ofs_in_node, READA, true); 725 if (IS_ERR(data_page)) { 726 iput(inode); 727 continue; 728 } 729 730 f2fs_put_page(data_page, 0); 731 add_gc_inode(gc_list, inode); 732 continue; 733 } 734 735 /* phase 3 */ 736 inode = find_gc_inode(gc_list, dni.ino); 737 if (inode) { 738 start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)) 739 + ofs_in_node; 740 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) 741 move_encrypted_block(inode, start_bidx); 742 else 743 move_data_page(inode, start_bidx, gc_type); 744 stat_inc_data_blk_count(sbi, 1, gc_type); 745 } 746 } 747 748 if (++phase < 4) 749 goto next_step; 750 751 if (gc_type == FG_GC) { 752 f2fs_submit_merged_bio(sbi, DATA, WRITE); 753 754 /* return 1 only if FG_GC succefully reclaimed one */ 755 if (get_valid_blocks(sbi, segno, 1) == 0) 756 return 1; 757 } 758 return 0; 759 } 760 761 static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, 762 int gc_type) 763 { 764 struct sit_info *sit_i = SIT_I(sbi); 765 int ret; 766 767 mutex_lock(&sit_i->sentry_lock); 768 ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, 769 NO_CHECK_TYPE, LFS); 770 mutex_unlock(&sit_i->sentry_lock); 771 return ret; 772 } 773 774 static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, 775 struct gc_inode_list *gc_list, int gc_type) 776 { 777 struct page *sum_page; 778 struct f2fs_summary_block *sum; 779 struct blk_plug plug; 780 int nfree = 0; 781 782 /* read segment summary of victim */ 783 sum_page = get_sum_page(sbi, segno); 784 785 blk_start_plug(&plug); 786 787 sum = page_address(sum_page); 788 789 /* 790 * this is to avoid deadlock: 791 * - lock_page(sum_page) - f2fs_replace_block 792 * - check_valid_map() - mutex_lock(sentry_lock) 793 * - mutex_lock(sentry_lock) - change_curseg() 794 * - lock_page(sum_page) 795 */ 796 unlock_page(sum_page); 797 798 switch (GET_SUM_TYPE((&sum->footer))) { 799 case SUM_TYPE_NODE: 800 nfree = gc_node_segment(sbi, sum->entries, segno, gc_type); 801 break; 802 case SUM_TYPE_DATA: 803 nfree = gc_data_segment(sbi, sum->entries, gc_list, 804 segno, gc_type); 805 break; 806 } 807 blk_finish_plug(&plug); 808 809 stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)), gc_type); 810 stat_inc_call_count(sbi->stat_info); 811 812 f2fs_put_page(sum_page, 0); 813 return nfree; 814 } 815 816 int f2fs_gc(struct f2fs_sb_info *sbi, bool sync) 817 { 818 unsigned int segno, i; 819 int gc_type = sync ? FG_GC : BG_GC; 820 int sec_freed = 0; 821 int ret = -EINVAL; 822 struct cp_control cpc; 823 struct gc_inode_list gc_list = { 824 .ilist = LIST_HEAD_INIT(gc_list.ilist), 825 .iroot = RADIX_TREE_INIT(GFP_NOFS), 826 }; 827 828 cpc.reason = __get_cp_reason(sbi); 829 gc_more: 830 segno = NULL_SEGNO; 831 832 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) 833 goto stop; 834 if (unlikely(f2fs_cp_error(sbi))) { 835 ret = -EIO; 836 goto stop; 837 } 838 839 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) { 840 gc_type = FG_GC; 841 if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi)) 842 write_checkpoint(sbi, &cpc); 843 } 844 845 if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type)) 846 goto stop; 847 ret = 0; 848 849 /* readahead multi ssa blocks those have contiguous address */ 850 if (sbi->segs_per_sec > 1) 851 ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec, 852 META_SSA, true); 853 854 for (i = 0; i < sbi->segs_per_sec; i++) { 855 /* 856 * for FG_GC case, halt gcing left segments once failed one 857 * of segments in selected section to avoid long latency. 858 */ 859 if (!do_garbage_collect(sbi, segno + i, &gc_list, gc_type) && 860 gc_type == FG_GC) 861 break; 862 } 863 864 if (i == sbi->segs_per_sec && gc_type == FG_GC) 865 sec_freed++; 866 867 if (gc_type == FG_GC) 868 sbi->cur_victim_sec = NULL_SEGNO; 869 870 if (!sync) { 871 if (has_not_enough_free_secs(sbi, sec_freed)) 872 goto gc_more; 873 874 if (gc_type == FG_GC) 875 write_checkpoint(sbi, &cpc); 876 } 877 stop: 878 mutex_unlock(&sbi->gc_mutex); 879 880 put_gc_inode(&gc_list); 881 882 if (sync) 883 ret = sec_freed ? 0 : -EAGAIN; 884 return ret; 885 } 886 887 void build_gc_manager(struct f2fs_sb_info *sbi) 888 { 889 DIRTY_I(sbi)->v_ops = &default_v_ops; 890 } 891