1 /* 2 * fs/f2fs/segment.c 3 * 4 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 5 * http://www.samsung.com/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 #include <linux/fs.h> 12 #include <linux/f2fs_fs.h> 13 #include <linux/bio.h> 14 #include <linux/blkdev.h> 15 #include <linux/prefetch.h> 16 #include <linux/kthread.h> 17 #include <linux/swap.h> 18 #include <linux/timer.h> 19 20 #include "f2fs.h" 21 #include "segment.h" 22 #include "node.h" 23 #include "trace.h" 24 #include <trace/events/f2fs.h> 25 26 #define __reverse_ffz(x) __reverse_ffs(~(x)) 27 28 static struct kmem_cache *discard_entry_slab; 29 static struct kmem_cache *sit_entry_set_slab; 30 static struct kmem_cache *inmem_entry_slab; 31 32 static unsigned long __reverse_ulong(unsigned char *str) 33 { 34 unsigned long tmp = 0; 35 int shift = 24, idx = 0; 36 37 #if BITS_PER_LONG == 64 38 shift = 56; 39 #endif 40 while (shift >= 0) { 41 tmp |= (unsigned long)str[idx++] << shift; 42 shift -= BITS_PER_BYTE; 43 } 44 return tmp; 45 } 46 47 /* 48 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since 49 * MSB and LSB are reversed in a byte by f2fs_set_bit. 50 */ 51 static inline unsigned long __reverse_ffs(unsigned long word) 52 { 53 int num = 0; 54 55 #if BITS_PER_LONG == 64 56 if ((word & 0xffffffff00000000UL) == 0) 57 num += 32; 58 else 59 word >>= 32; 60 #endif 61 if ((word & 0xffff0000) == 0) 62 num += 16; 63 else 64 word >>= 16; 65 66 if ((word & 0xff00) == 0) 67 num += 8; 68 else 69 word >>= 8; 70 71 if ((word & 0xf0) == 0) 72 num += 4; 73 else 74 word >>= 4; 75 76 if ((word & 0xc) == 0) 77 num += 2; 78 else 79 word >>= 2; 80 81 if ((word & 0x2) == 0) 82 num += 1; 83 return num; 84 } 85 86 /* 87 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because 88 * f2fs_set_bit makes MSB and LSB reversed in a byte. 89 * Example: 90 * MSB <--> LSB 91 * f2fs_set_bit(0, bitmap) => 1000 0000 92 * f2fs_set_bit(7, bitmap) => 0000 0001 93 */ 94 static unsigned long __find_rev_next_bit(const unsigned long *addr, 95 unsigned long size, unsigned long offset) 96 { 97 const unsigned long *p = addr + BIT_WORD(offset); 98 unsigned long result = offset & ~(BITS_PER_LONG - 1); 99 unsigned long tmp; 100 101 if (offset >= size) 102 return size; 103 104 size -= result; 105 offset %= BITS_PER_LONG; 106 if (!offset) 107 goto aligned; 108 109 tmp = __reverse_ulong((unsigned char *)p); 110 tmp &= ~0UL >> offset; 111 112 if (size < BITS_PER_LONG) 113 goto found_first; 114 if (tmp) 115 goto found_middle; 116 117 size -= BITS_PER_LONG; 118 result += BITS_PER_LONG; 119 p++; 120 aligned: 121 while (size & ~(BITS_PER_LONG-1)) { 122 tmp = __reverse_ulong((unsigned char *)p); 123 if (tmp) 124 goto found_middle; 125 result += BITS_PER_LONG; 126 size -= BITS_PER_LONG; 127 p++; 128 } 129 if (!size) 130 return result; 131 132 tmp = __reverse_ulong((unsigned char *)p); 133 found_first: 134 tmp &= (~0UL << (BITS_PER_LONG - size)); 135 if (!tmp) /* Are any bits set? */ 136 return result + size; /* Nope. */ 137 found_middle: 138 return result + __reverse_ffs(tmp); 139 } 140 141 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, 142 unsigned long size, unsigned long offset) 143 { 144 const unsigned long *p = addr + BIT_WORD(offset); 145 unsigned long result = offset & ~(BITS_PER_LONG - 1); 146 unsigned long tmp; 147 148 if (offset >= size) 149 return size; 150 151 size -= result; 152 offset %= BITS_PER_LONG; 153 if (!offset) 154 goto aligned; 155 156 tmp = __reverse_ulong((unsigned char *)p); 157 tmp |= ~((~0UL << offset) >> offset); 158 159 if (size < BITS_PER_LONG) 160 goto found_first; 161 if (tmp != ~0UL) 162 goto found_middle; 163 164 size -= BITS_PER_LONG; 165 result += BITS_PER_LONG; 166 p++; 167 aligned: 168 while (size & ~(BITS_PER_LONG - 1)) { 169 tmp = __reverse_ulong((unsigned char *)p); 170 if (tmp != ~0UL) 171 goto found_middle; 172 result += BITS_PER_LONG; 173 size -= BITS_PER_LONG; 174 p++; 175 } 176 if (!size) 177 return result; 178 179 tmp = __reverse_ulong((unsigned char *)p); 180 found_first: 181 tmp |= ~(~0UL << (BITS_PER_LONG - size)); 182 if (tmp == ~0UL) /* Are any bits zero? */ 183 return result + size; /* Nope. */ 184 found_middle: 185 return result + __reverse_ffz(tmp); 186 } 187 188 void register_inmem_page(struct inode *inode, struct page *page) 189 { 190 struct f2fs_inode_info *fi = F2FS_I(inode); 191 struct inmem_pages *new; 192 193 f2fs_trace_pid(page); 194 195 set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE); 196 SetPagePrivate(page); 197 198 new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); 199 200 /* add atomic page indices to the list */ 201 new->page = page; 202 INIT_LIST_HEAD(&new->list); 203 204 /* increase reference count with clean state */ 205 mutex_lock(&fi->inmem_lock); 206 get_page(page); 207 list_add_tail(&new->list, &fi->inmem_pages); 208 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 209 mutex_unlock(&fi->inmem_lock); 210 211 trace_f2fs_register_inmem_page(page, INMEM); 212 } 213 214 int commit_inmem_pages(struct inode *inode, bool abort) 215 { 216 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 217 struct f2fs_inode_info *fi = F2FS_I(inode); 218 struct inmem_pages *cur, *tmp; 219 bool submit_bio = false; 220 struct f2fs_io_info fio = { 221 .sbi = sbi, 222 .type = DATA, 223 .rw = WRITE_SYNC | REQ_PRIO, 224 .encrypted_page = NULL, 225 }; 226 int err = 0; 227 228 /* 229 * The abort is true only when f2fs_evict_inode is called. 230 * Basically, the f2fs_evict_inode doesn't produce any data writes, so 231 * that we don't need to call f2fs_balance_fs. 232 * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this 233 * inode becomes free by iget_locked in f2fs_iget. 234 */ 235 if (!abort) { 236 f2fs_balance_fs(sbi); 237 f2fs_lock_op(sbi); 238 } 239 240 mutex_lock(&fi->inmem_lock); 241 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { 242 lock_page(cur->page); 243 if (!abort) { 244 if (cur->page->mapping == inode->i_mapping) { 245 set_page_dirty(cur->page); 246 f2fs_wait_on_page_writeback(cur->page, DATA); 247 if (clear_page_dirty_for_io(cur->page)) 248 inode_dec_dirty_pages(inode); 249 trace_f2fs_commit_inmem_page(cur->page, INMEM); 250 fio.page = cur->page; 251 err = do_write_data_page(&fio); 252 if (err) { 253 unlock_page(cur->page); 254 break; 255 } 256 clear_cold_data(cur->page); 257 submit_bio = true; 258 } 259 } else { 260 trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP); 261 } 262 set_page_private(cur->page, 0); 263 ClearPagePrivate(cur->page); 264 f2fs_put_page(cur->page, 1); 265 266 list_del(&cur->list); 267 kmem_cache_free(inmem_entry_slab, cur); 268 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 269 } 270 mutex_unlock(&fi->inmem_lock); 271 272 if (!abort) { 273 f2fs_unlock_op(sbi); 274 if (submit_bio) 275 f2fs_submit_merged_bio(sbi, DATA, WRITE); 276 } 277 return err; 278 } 279 280 /* 281 * This function balances dirty node and dentry pages. 282 * In addition, it controls garbage collection. 283 */ 284 void f2fs_balance_fs(struct f2fs_sb_info *sbi) 285 { 286 /* 287 * We should do GC or end up with checkpoint, if there are so many dirty 288 * dir/node pages without enough free segments. 289 */ 290 if (has_not_enough_free_secs(sbi, 0)) { 291 mutex_lock(&sbi->gc_mutex); 292 f2fs_gc(sbi, false); 293 } 294 } 295 296 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) 297 { 298 /* try to shrink extent cache when there is no enough memory */ 299 if (!available_free_memory(sbi, EXTENT_CACHE)) 300 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); 301 302 /* check the # of cached NAT entries */ 303 if (!available_free_memory(sbi, NAT_ENTRIES)) 304 try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); 305 306 if (!available_free_memory(sbi, FREE_NIDS)) 307 try_to_free_nids(sbi, NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES); 308 309 /* checkpoint is the only way to shrink partial cached entries */ 310 if (!available_free_memory(sbi, NAT_ENTRIES) || 311 excess_prefree_segs(sbi) || 312 !available_free_memory(sbi, INO_ENTRIES) || 313 jiffies > sbi->cp_expires) 314 f2fs_sync_fs(sbi->sb, true); 315 } 316 317 static int issue_flush_thread(void *data) 318 { 319 struct f2fs_sb_info *sbi = data; 320 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; 321 wait_queue_head_t *q = &fcc->flush_wait_queue; 322 repeat: 323 if (kthread_should_stop()) 324 return 0; 325 326 if (!llist_empty(&fcc->issue_list)) { 327 struct bio *bio; 328 struct flush_cmd *cmd, *next; 329 int ret; 330 331 bio = f2fs_bio_alloc(0); 332 333 fcc->dispatch_list = llist_del_all(&fcc->issue_list); 334 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); 335 336 bio->bi_bdev = sbi->sb->s_bdev; 337 ret = submit_bio_wait(WRITE_FLUSH, bio); 338 339 llist_for_each_entry_safe(cmd, next, 340 fcc->dispatch_list, llnode) { 341 cmd->ret = ret; 342 complete(&cmd->wait); 343 } 344 bio_put(bio); 345 fcc->dispatch_list = NULL; 346 } 347 348 wait_event_interruptible(*q, 349 kthread_should_stop() || !llist_empty(&fcc->issue_list)); 350 goto repeat; 351 } 352 353 int f2fs_issue_flush(struct f2fs_sb_info *sbi) 354 { 355 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; 356 struct flush_cmd cmd; 357 358 trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER), 359 test_opt(sbi, FLUSH_MERGE)); 360 361 if (test_opt(sbi, NOBARRIER)) 362 return 0; 363 364 if (!test_opt(sbi, FLUSH_MERGE)) { 365 struct bio *bio = f2fs_bio_alloc(0); 366 int ret; 367 368 bio->bi_bdev = sbi->sb->s_bdev; 369 ret = submit_bio_wait(WRITE_FLUSH, bio); 370 bio_put(bio); 371 return ret; 372 } 373 374 init_completion(&cmd.wait); 375 376 llist_add(&cmd.llnode, &fcc->issue_list); 377 378 if (!fcc->dispatch_list) 379 wake_up(&fcc->flush_wait_queue); 380 381 wait_for_completion(&cmd.wait); 382 383 return cmd.ret; 384 } 385 386 int create_flush_cmd_control(struct f2fs_sb_info *sbi) 387 { 388 dev_t dev = sbi->sb->s_bdev->bd_dev; 389 struct flush_cmd_control *fcc; 390 int err = 0; 391 392 fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); 393 if (!fcc) 394 return -ENOMEM; 395 init_waitqueue_head(&fcc->flush_wait_queue); 396 init_llist_head(&fcc->issue_list); 397 SM_I(sbi)->cmd_control_info = fcc; 398 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, 399 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); 400 if (IS_ERR(fcc->f2fs_issue_flush)) { 401 err = PTR_ERR(fcc->f2fs_issue_flush); 402 kfree(fcc); 403 SM_I(sbi)->cmd_control_info = NULL; 404 return err; 405 } 406 407 return err; 408 } 409 410 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi) 411 { 412 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; 413 414 if (fcc && fcc->f2fs_issue_flush) 415 kthread_stop(fcc->f2fs_issue_flush); 416 kfree(fcc); 417 SM_I(sbi)->cmd_control_info = NULL; 418 } 419 420 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 421 enum dirty_type dirty_type) 422 { 423 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 424 425 /* need not be added */ 426 if (IS_CURSEG(sbi, segno)) 427 return; 428 429 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) 430 dirty_i->nr_dirty[dirty_type]++; 431 432 if (dirty_type == DIRTY) { 433 struct seg_entry *sentry = get_seg_entry(sbi, segno); 434 enum dirty_type t = sentry->type; 435 436 if (unlikely(t >= DIRTY)) { 437 f2fs_bug_on(sbi, 1); 438 return; 439 } 440 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) 441 dirty_i->nr_dirty[t]++; 442 } 443 } 444 445 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 446 enum dirty_type dirty_type) 447 { 448 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 449 450 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type])) 451 dirty_i->nr_dirty[dirty_type]--; 452 453 if (dirty_type == DIRTY) { 454 struct seg_entry *sentry = get_seg_entry(sbi, segno); 455 enum dirty_type t = sentry->type; 456 457 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) 458 dirty_i->nr_dirty[t]--; 459 460 if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) 461 clear_bit(GET_SECNO(sbi, segno), 462 dirty_i->victim_secmap); 463 } 464 } 465 466 /* 467 * Should not occur error such as -ENOMEM. 468 * Adding dirty entry into seglist is not critical operation. 469 * If a given segment is one of current working segments, it won't be added. 470 */ 471 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) 472 { 473 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 474 unsigned short valid_blocks; 475 476 if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) 477 return; 478 479 mutex_lock(&dirty_i->seglist_lock); 480 481 valid_blocks = get_valid_blocks(sbi, segno, 0); 482 483 if (valid_blocks == 0) { 484 __locate_dirty_segment(sbi, segno, PRE); 485 __remove_dirty_segment(sbi, segno, DIRTY); 486 } else if (valid_blocks < sbi->blocks_per_seg) { 487 __locate_dirty_segment(sbi, segno, DIRTY); 488 } else { 489 /* Recovery routine with SSR needs this */ 490 __remove_dirty_segment(sbi, segno, DIRTY); 491 } 492 493 mutex_unlock(&dirty_i->seglist_lock); 494 } 495 496 static int f2fs_issue_discard(struct f2fs_sb_info *sbi, 497 block_t blkstart, block_t blklen) 498 { 499 sector_t start = SECTOR_FROM_BLOCK(blkstart); 500 sector_t len = SECTOR_FROM_BLOCK(blklen); 501 struct seg_entry *se; 502 unsigned int offset; 503 block_t i; 504 505 for (i = blkstart; i < blkstart + blklen; i++) { 506 se = get_seg_entry(sbi, GET_SEGNO(sbi, i)); 507 offset = GET_BLKOFF_FROM_SEG0(sbi, i); 508 509 if (!f2fs_test_and_set_bit(offset, se->discard_map)) 510 sbi->discard_blks--; 511 } 512 trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); 513 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); 514 } 515 516 bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) 517 { 518 int err = -ENOTSUPP; 519 520 if (test_opt(sbi, DISCARD)) { 521 struct seg_entry *se = get_seg_entry(sbi, 522 GET_SEGNO(sbi, blkaddr)); 523 unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 524 525 if (f2fs_test_bit(offset, se->discard_map)) 526 return false; 527 528 err = f2fs_issue_discard(sbi, blkaddr, 1); 529 } 530 531 if (err) { 532 update_meta_page(sbi, NULL, blkaddr); 533 return true; 534 } 535 return false; 536 } 537 538 static void __add_discard_entry(struct f2fs_sb_info *sbi, 539 struct cp_control *cpc, struct seg_entry *se, 540 unsigned int start, unsigned int end) 541 { 542 struct list_head *head = &SM_I(sbi)->discard_list; 543 struct discard_entry *new, *last; 544 545 if (!list_empty(head)) { 546 last = list_last_entry(head, struct discard_entry, list); 547 if (START_BLOCK(sbi, cpc->trim_start) + start == 548 last->blkaddr + last->len) { 549 last->len += end - start; 550 goto done; 551 } 552 } 553 554 new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); 555 INIT_LIST_HEAD(&new->list); 556 new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; 557 new->len = end - start; 558 list_add_tail(&new->list, head); 559 done: 560 SM_I(sbi)->nr_discards += end - start; 561 } 562 563 static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) 564 { 565 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 566 int max_blocks = sbi->blocks_per_seg; 567 struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); 568 unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 569 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 570 unsigned long *discard_map = (unsigned long *)se->discard_map; 571 unsigned long *dmap = SIT_I(sbi)->tmp_map; 572 unsigned int start = 0, end = -1; 573 bool force = (cpc->reason == CP_DISCARD); 574 int i; 575 576 if (se->valid_blocks == max_blocks) 577 return; 578 579 if (!force) { 580 if (!test_opt(sbi, DISCARD) || !se->valid_blocks || 581 SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards) 582 return; 583 } 584 585 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ 586 for (i = 0; i < entries; i++) 587 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] : 588 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; 589 590 while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { 591 start = __find_rev_next_bit(dmap, max_blocks, end + 1); 592 if (start >= max_blocks) 593 break; 594 595 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); 596 __add_discard_entry(sbi, cpc, se, start, end); 597 } 598 } 599 600 void release_discard_addrs(struct f2fs_sb_info *sbi) 601 { 602 struct list_head *head = &(SM_I(sbi)->discard_list); 603 struct discard_entry *entry, *this; 604 605 /* drop caches */ 606 list_for_each_entry_safe(entry, this, head, list) { 607 list_del(&entry->list); 608 kmem_cache_free(discard_entry_slab, entry); 609 } 610 } 611 612 /* 613 * Should call clear_prefree_segments after checkpoint is done. 614 */ 615 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) 616 { 617 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 618 unsigned int segno; 619 620 mutex_lock(&dirty_i->seglist_lock); 621 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi)) 622 __set_test_and_free(sbi, segno); 623 mutex_unlock(&dirty_i->seglist_lock); 624 } 625 626 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) 627 { 628 struct list_head *head = &(SM_I(sbi)->discard_list); 629 struct discard_entry *entry, *this; 630 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 631 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; 632 unsigned int start = 0, end = -1; 633 634 mutex_lock(&dirty_i->seglist_lock); 635 636 while (1) { 637 int i; 638 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1); 639 if (start >= MAIN_SEGS(sbi)) 640 break; 641 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi), 642 start + 1); 643 644 for (i = start; i < end; i++) 645 clear_bit(i, prefree_map); 646 647 dirty_i->nr_dirty[PRE] -= end - start; 648 649 if (!test_opt(sbi, DISCARD)) 650 continue; 651 652 f2fs_issue_discard(sbi, START_BLOCK(sbi, start), 653 (end - start) << sbi->log_blocks_per_seg); 654 } 655 mutex_unlock(&dirty_i->seglist_lock); 656 657 /* send small discards */ 658 list_for_each_entry_safe(entry, this, head, list) { 659 if (cpc->reason == CP_DISCARD && entry->len < cpc->trim_minlen) 660 goto skip; 661 f2fs_issue_discard(sbi, entry->blkaddr, entry->len); 662 cpc->trimmed += entry->len; 663 skip: 664 list_del(&entry->list); 665 SM_I(sbi)->nr_discards -= entry->len; 666 kmem_cache_free(discard_entry_slab, entry); 667 } 668 } 669 670 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) 671 { 672 struct sit_info *sit_i = SIT_I(sbi); 673 674 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) { 675 sit_i->dirty_sentries++; 676 return false; 677 } 678 679 return true; 680 } 681 682 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, 683 unsigned int segno, int modified) 684 { 685 struct seg_entry *se = get_seg_entry(sbi, segno); 686 se->type = type; 687 if (modified) 688 __mark_sit_entry_dirty(sbi, segno); 689 } 690 691 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) 692 { 693 struct seg_entry *se; 694 unsigned int segno, offset; 695 long int new_vblocks; 696 697 segno = GET_SEGNO(sbi, blkaddr); 698 699 se = get_seg_entry(sbi, segno); 700 new_vblocks = se->valid_blocks + del; 701 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 702 703 f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) || 704 (new_vblocks > sbi->blocks_per_seg))); 705 706 se->valid_blocks = new_vblocks; 707 se->mtime = get_mtime(sbi); 708 SIT_I(sbi)->max_mtime = se->mtime; 709 710 /* Update valid block bitmap */ 711 if (del > 0) { 712 if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) 713 f2fs_bug_on(sbi, 1); 714 if (!f2fs_test_and_set_bit(offset, se->discard_map)) 715 sbi->discard_blks--; 716 } else { 717 if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) 718 f2fs_bug_on(sbi, 1); 719 if (f2fs_test_and_clear_bit(offset, se->discard_map)) 720 sbi->discard_blks++; 721 } 722 if (!f2fs_test_bit(offset, se->ckpt_valid_map)) 723 se->ckpt_valid_blocks += del; 724 725 __mark_sit_entry_dirty(sbi, segno); 726 727 /* update total number of valid blocks to be written in ckpt area */ 728 SIT_I(sbi)->written_valid_blocks += del; 729 730 if (sbi->segs_per_sec > 1) 731 get_sec_entry(sbi, segno)->valid_blocks += del; 732 } 733 734 void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new) 735 { 736 update_sit_entry(sbi, new, 1); 737 if (GET_SEGNO(sbi, old) != NULL_SEGNO) 738 update_sit_entry(sbi, old, -1); 739 740 locate_dirty_segment(sbi, GET_SEGNO(sbi, old)); 741 locate_dirty_segment(sbi, GET_SEGNO(sbi, new)); 742 } 743 744 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) 745 { 746 unsigned int segno = GET_SEGNO(sbi, addr); 747 struct sit_info *sit_i = SIT_I(sbi); 748 749 f2fs_bug_on(sbi, addr == NULL_ADDR); 750 if (addr == NEW_ADDR) 751 return; 752 753 /* add it into sit main buffer */ 754 mutex_lock(&sit_i->sentry_lock); 755 756 update_sit_entry(sbi, addr, -1); 757 758 /* add it into dirty seglist */ 759 locate_dirty_segment(sbi, segno); 760 761 mutex_unlock(&sit_i->sentry_lock); 762 } 763 764 bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) 765 { 766 struct sit_info *sit_i = SIT_I(sbi); 767 unsigned int segno, offset; 768 struct seg_entry *se; 769 bool is_cp = false; 770 771 if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) 772 return true; 773 774 mutex_lock(&sit_i->sentry_lock); 775 776 segno = GET_SEGNO(sbi, blkaddr); 777 se = get_seg_entry(sbi, segno); 778 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 779 780 if (f2fs_test_bit(offset, se->ckpt_valid_map)) 781 is_cp = true; 782 783 mutex_unlock(&sit_i->sentry_lock); 784 785 return is_cp; 786 } 787 788 /* 789 * This function should be resided under the curseg_mutex lock 790 */ 791 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, 792 struct f2fs_summary *sum) 793 { 794 struct curseg_info *curseg = CURSEG_I(sbi, type); 795 void *addr = curseg->sum_blk; 796 addr += curseg->next_blkoff * sizeof(struct f2fs_summary); 797 memcpy(addr, sum, sizeof(struct f2fs_summary)); 798 } 799 800 /* 801 * Calculate the number of current summary pages for writing 802 */ 803 int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) 804 { 805 int valid_sum_count = 0; 806 int i, sum_in_page; 807 808 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 809 if (sbi->ckpt->alloc_type[i] == SSR) 810 valid_sum_count += sbi->blocks_per_seg; 811 else { 812 if (for_ra) 813 valid_sum_count += le16_to_cpu( 814 F2FS_CKPT(sbi)->cur_data_blkoff[i]); 815 else 816 valid_sum_count += curseg_blkoff(sbi, i); 817 } 818 } 819 820 sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE - 821 SUM_FOOTER_SIZE) / SUMMARY_SIZE; 822 if (valid_sum_count <= sum_in_page) 823 return 1; 824 else if ((valid_sum_count - sum_in_page) <= 825 (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE) 826 return 2; 827 return 3; 828 } 829 830 /* 831 * Caller should put this summary page 832 */ 833 struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) 834 { 835 return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno)); 836 } 837 838 void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) 839 { 840 struct page *page = grab_meta_page(sbi, blk_addr); 841 void *dst = page_address(page); 842 843 if (src) 844 memcpy(dst, src, PAGE_CACHE_SIZE); 845 else 846 memset(dst, 0, PAGE_CACHE_SIZE); 847 set_page_dirty(page); 848 f2fs_put_page(page, 1); 849 } 850 851 static void write_sum_page(struct f2fs_sb_info *sbi, 852 struct f2fs_summary_block *sum_blk, block_t blk_addr) 853 { 854 update_meta_page(sbi, (void *)sum_blk, blk_addr); 855 } 856 857 static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) 858 { 859 struct curseg_info *curseg = CURSEG_I(sbi, type); 860 unsigned int segno = curseg->segno + 1; 861 struct free_segmap_info *free_i = FREE_I(sbi); 862 863 if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) 864 return !test_bit(segno, free_i->free_segmap); 865 return 0; 866 } 867 868 /* 869 * Find a new segment from the free segments bitmap to right order 870 * This function should be returned with success, otherwise BUG 871 */ 872 static void get_new_segment(struct f2fs_sb_info *sbi, 873 unsigned int *newseg, bool new_sec, int dir) 874 { 875 struct free_segmap_info *free_i = FREE_I(sbi); 876 unsigned int segno, secno, zoneno; 877 unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; 878 unsigned int hint = *newseg / sbi->segs_per_sec; 879 unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); 880 unsigned int left_start = hint; 881 bool init = true; 882 int go_left = 0; 883 int i; 884 885 spin_lock(&free_i->segmap_lock); 886 887 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { 888 segno = find_next_zero_bit(free_i->free_segmap, 889 MAIN_SEGS(sbi), *newseg + 1); 890 if (segno - *newseg < sbi->segs_per_sec - 891 (*newseg % sbi->segs_per_sec)) 892 goto got_it; 893 } 894 find_other_zone: 895 secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); 896 if (secno >= MAIN_SECS(sbi)) { 897 if (dir == ALLOC_RIGHT) { 898 secno = find_next_zero_bit(free_i->free_secmap, 899 MAIN_SECS(sbi), 0); 900 f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); 901 } else { 902 go_left = 1; 903 left_start = hint - 1; 904 } 905 } 906 if (go_left == 0) 907 goto skip_left; 908 909 while (test_bit(left_start, free_i->free_secmap)) { 910 if (left_start > 0) { 911 left_start--; 912 continue; 913 } 914 left_start = find_next_zero_bit(free_i->free_secmap, 915 MAIN_SECS(sbi), 0); 916 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi)); 917 break; 918 } 919 secno = left_start; 920 skip_left: 921 hint = secno; 922 segno = secno * sbi->segs_per_sec; 923 zoneno = secno / sbi->secs_per_zone; 924 925 /* give up on finding another zone */ 926 if (!init) 927 goto got_it; 928 if (sbi->secs_per_zone == 1) 929 goto got_it; 930 if (zoneno == old_zoneno) 931 goto got_it; 932 if (dir == ALLOC_LEFT) { 933 if (!go_left && zoneno + 1 >= total_zones) 934 goto got_it; 935 if (go_left && zoneno == 0) 936 goto got_it; 937 } 938 for (i = 0; i < NR_CURSEG_TYPE; i++) 939 if (CURSEG_I(sbi, i)->zone == zoneno) 940 break; 941 942 if (i < NR_CURSEG_TYPE) { 943 /* zone is in user, try another */ 944 if (go_left) 945 hint = zoneno * sbi->secs_per_zone - 1; 946 else if (zoneno + 1 >= total_zones) 947 hint = 0; 948 else 949 hint = (zoneno + 1) * sbi->secs_per_zone; 950 init = false; 951 goto find_other_zone; 952 } 953 got_it: 954 /* set it as dirty segment in free segmap */ 955 f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); 956 __set_inuse(sbi, segno); 957 *newseg = segno; 958 spin_unlock(&free_i->segmap_lock); 959 } 960 961 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) 962 { 963 struct curseg_info *curseg = CURSEG_I(sbi, type); 964 struct summary_footer *sum_footer; 965 966 curseg->segno = curseg->next_segno; 967 curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno); 968 curseg->next_blkoff = 0; 969 curseg->next_segno = NULL_SEGNO; 970 971 sum_footer = &(curseg->sum_blk->footer); 972 memset(sum_footer, 0, sizeof(struct summary_footer)); 973 if (IS_DATASEG(type)) 974 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA); 975 if (IS_NODESEG(type)) 976 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE); 977 __set_sit_entry_type(sbi, type, curseg->segno, modified); 978 } 979 980 /* 981 * Allocate a current working segment. 982 * This function always allocates a free segment in LFS manner. 983 */ 984 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) 985 { 986 struct curseg_info *curseg = CURSEG_I(sbi, type); 987 unsigned int segno = curseg->segno; 988 int dir = ALLOC_LEFT; 989 990 write_sum_page(sbi, curseg->sum_blk, 991 GET_SUM_BLOCK(sbi, segno)); 992 if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA) 993 dir = ALLOC_RIGHT; 994 995 if (test_opt(sbi, NOHEAP)) 996 dir = ALLOC_RIGHT; 997 998 get_new_segment(sbi, &segno, new_sec, dir); 999 curseg->next_segno = segno; 1000 reset_curseg(sbi, type, 1); 1001 curseg->alloc_type = LFS; 1002 } 1003 1004 static void __next_free_blkoff(struct f2fs_sb_info *sbi, 1005 struct curseg_info *seg, block_t start) 1006 { 1007 struct seg_entry *se = get_seg_entry(sbi, seg->segno); 1008 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 1009 unsigned long *target_map = SIT_I(sbi)->tmp_map; 1010 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 1011 unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 1012 int i, pos; 1013 1014 for (i = 0; i < entries; i++) 1015 target_map[i] = ckpt_map[i] | cur_map[i]; 1016 1017 pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); 1018 1019 seg->next_blkoff = pos; 1020 } 1021 1022 /* 1023 * If a segment is written by LFS manner, next block offset is just obtained 1024 * by increasing the current block offset. However, if a segment is written by 1025 * SSR manner, next block offset obtained by calling __next_free_blkoff 1026 */ 1027 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, 1028 struct curseg_info *seg) 1029 { 1030 if (seg->alloc_type == SSR) 1031 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1); 1032 else 1033 seg->next_blkoff++; 1034 } 1035 1036 /* 1037 * This function always allocates a used segment(from dirty seglist) by SSR 1038 * manner, so it should recover the existing segment information of valid blocks 1039 */ 1040 static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) 1041 { 1042 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1043 struct curseg_info *curseg = CURSEG_I(sbi, type); 1044 unsigned int new_segno = curseg->next_segno; 1045 struct f2fs_summary_block *sum_node; 1046 struct page *sum_page; 1047 1048 write_sum_page(sbi, curseg->sum_blk, 1049 GET_SUM_BLOCK(sbi, curseg->segno)); 1050 __set_test_and_inuse(sbi, new_segno); 1051 1052 mutex_lock(&dirty_i->seglist_lock); 1053 __remove_dirty_segment(sbi, new_segno, PRE); 1054 __remove_dirty_segment(sbi, new_segno, DIRTY); 1055 mutex_unlock(&dirty_i->seglist_lock); 1056 1057 reset_curseg(sbi, type, 1); 1058 curseg->alloc_type = SSR; 1059 __next_free_blkoff(sbi, curseg, 0); 1060 1061 if (reuse) { 1062 sum_page = get_sum_page(sbi, new_segno); 1063 sum_node = (struct f2fs_summary_block *)page_address(sum_page); 1064 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); 1065 f2fs_put_page(sum_page, 1); 1066 } 1067 } 1068 1069 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) 1070 { 1071 struct curseg_info *curseg = CURSEG_I(sbi, type); 1072 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; 1073 1074 if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0)) 1075 return v_ops->get_victim(sbi, 1076 &(curseg)->next_segno, BG_GC, type, SSR); 1077 1078 /* For data segments, let's do SSR more intensively */ 1079 for (; type >= CURSEG_HOT_DATA; type--) 1080 if (v_ops->get_victim(sbi, &(curseg)->next_segno, 1081 BG_GC, type, SSR)) 1082 return 1; 1083 return 0; 1084 } 1085 1086 /* 1087 * flush out current segment and replace it with new segment 1088 * This function should be returned with success, otherwise BUG 1089 */ 1090 static void allocate_segment_by_default(struct f2fs_sb_info *sbi, 1091 int type, bool force) 1092 { 1093 struct curseg_info *curseg = CURSEG_I(sbi, type); 1094 1095 if (force) 1096 new_curseg(sbi, type, true); 1097 else if (type == CURSEG_WARM_NODE) 1098 new_curseg(sbi, type, false); 1099 else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) 1100 new_curseg(sbi, type, false); 1101 else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) 1102 change_curseg(sbi, type, true); 1103 else 1104 new_curseg(sbi, type, false); 1105 1106 stat_inc_seg_type(sbi, curseg); 1107 } 1108 1109 static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type) 1110 { 1111 struct curseg_info *curseg = CURSEG_I(sbi, type); 1112 unsigned int old_segno; 1113 1114 old_segno = curseg->segno; 1115 SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); 1116 locate_dirty_segment(sbi, old_segno); 1117 } 1118 1119 void allocate_new_segments(struct f2fs_sb_info *sbi) 1120 { 1121 int i; 1122 1123 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) 1124 __allocate_new_segments(sbi, i); 1125 } 1126 1127 static const struct segment_allocation default_salloc_ops = { 1128 .allocate_segment = allocate_segment_by_default, 1129 }; 1130 1131 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) 1132 { 1133 __u64 start = F2FS_BYTES_TO_BLK(range->start); 1134 __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; 1135 unsigned int start_segno, end_segno; 1136 struct cp_control cpc; 1137 1138 if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) 1139 return -EINVAL; 1140 1141 cpc.trimmed = 0; 1142 if (end <= MAIN_BLKADDR(sbi)) 1143 goto out; 1144 1145 /* start/end segment number in main_area */ 1146 start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); 1147 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : 1148 GET_SEGNO(sbi, end); 1149 cpc.reason = CP_DISCARD; 1150 cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); 1151 1152 /* do checkpoint to issue discard commands safely */ 1153 for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { 1154 cpc.trim_start = start_segno; 1155 1156 if (sbi->discard_blks == 0) 1157 break; 1158 else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi)) 1159 cpc.trim_end = end_segno; 1160 else 1161 cpc.trim_end = min_t(unsigned int, 1162 rounddown(start_segno + 1163 BATCHED_TRIM_SEGMENTS(sbi), 1164 sbi->segs_per_sec) - 1, end_segno); 1165 1166 mutex_lock(&sbi->gc_mutex); 1167 write_checkpoint(sbi, &cpc); 1168 mutex_unlock(&sbi->gc_mutex); 1169 } 1170 out: 1171 range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); 1172 return 0; 1173 } 1174 1175 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) 1176 { 1177 struct curseg_info *curseg = CURSEG_I(sbi, type); 1178 if (curseg->next_blkoff < sbi->blocks_per_seg) 1179 return true; 1180 return false; 1181 } 1182 1183 static int __get_segment_type_2(struct page *page, enum page_type p_type) 1184 { 1185 if (p_type == DATA) 1186 return CURSEG_HOT_DATA; 1187 else 1188 return CURSEG_HOT_NODE; 1189 } 1190 1191 static int __get_segment_type_4(struct page *page, enum page_type p_type) 1192 { 1193 if (p_type == DATA) { 1194 struct inode *inode = page->mapping->host; 1195 1196 if (S_ISDIR(inode->i_mode)) 1197 return CURSEG_HOT_DATA; 1198 else 1199 return CURSEG_COLD_DATA; 1200 } else { 1201 if (IS_DNODE(page) && is_cold_node(page)) 1202 return CURSEG_WARM_NODE; 1203 else 1204 return CURSEG_COLD_NODE; 1205 } 1206 } 1207 1208 static int __get_segment_type_6(struct page *page, enum page_type p_type) 1209 { 1210 if (p_type == DATA) { 1211 struct inode *inode = page->mapping->host; 1212 1213 if (S_ISDIR(inode->i_mode)) 1214 return CURSEG_HOT_DATA; 1215 else if (is_cold_data(page) || file_is_cold(inode)) 1216 return CURSEG_COLD_DATA; 1217 else 1218 return CURSEG_WARM_DATA; 1219 } else { 1220 if (IS_DNODE(page)) 1221 return is_cold_node(page) ? CURSEG_WARM_NODE : 1222 CURSEG_HOT_NODE; 1223 else 1224 return CURSEG_COLD_NODE; 1225 } 1226 } 1227 1228 static int __get_segment_type(struct page *page, enum page_type p_type) 1229 { 1230 switch (F2FS_P_SB(page)->active_logs) { 1231 case 2: 1232 return __get_segment_type_2(page, p_type); 1233 case 4: 1234 return __get_segment_type_4(page, p_type); 1235 } 1236 /* NR_CURSEG_TYPE(6) logs by default */ 1237 f2fs_bug_on(F2FS_P_SB(page), 1238 F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE); 1239 return __get_segment_type_6(page, p_type); 1240 } 1241 1242 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, 1243 block_t old_blkaddr, block_t *new_blkaddr, 1244 struct f2fs_summary *sum, int type) 1245 { 1246 struct sit_info *sit_i = SIT_I(sbi); 1247 struct curseg_info *curseg; 1248 bool direct_io = (type == CURSEG_DIRECT_IO); 1249 1250 type = direct_io ? CURSEG_WARM_DATA : type; 1251 1252 curseg = CURSEG_I(sbi, type); 1253 1254 mutex_lock(&curseg->curseg_mutex); 1255 mutex_lock(&sit_i->sentry_lock); 1256 1257 /* direct_io'ed data is aligned to the segment for better performance */ 1258 if (direct_io && curseg->next_blkoff && 1259 !has_not_enough_free_secs(sbi, 0)) 1260 __allocate_new_segments(sbi, type); 1261 1262 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 1263 1264 /* 1265 * __add_sum_entry should be resided under the curseg_mutex 1266 * because, this function updates a summary entry in the 1267 * current summary block. 1268 */ 1269 __add_sum_entry(sbi, type, sum); 1270 1271 __refresh_next_blkoff(sbi, curseg); 1272 1273 stat_inc_block_count(sbi, curseg); 1274 1275 if (!__has_curseg_space(sbi, type)) 1276 sit_i->s_ops->allocate_segment(sbi, type, false); 1277 /* 1278 * SIT information should be updated before segment allocation, 1279 * since SSR needs latest valid block information. 1280 */ 1281 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); 1282 1283 mutex_unlock(&sit_i->sentry_lock); 1284 1285 if (page && IS_NODESEG(type)) 1286 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); 1287 1288 mutex_unlock(&curseg->curseg_mutex); 1289 } 1290 1291 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) 1292 { 1293 int type = __get_segment_type(fio->page, fio->type); 1294 1295 allocate_data_block(fio->sbi, fio->page, fio->blk_addr, 1296 &fio->blk_addr, sum, type); 1297 1298 /* writeout dirty page into bdev */ 1299 f2fs_submit_page_mbio(fio); 1300 } 1301 1302 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) 1303 { 1304 struct f2fs_io_info fio = { 1305 .sbi = sbi, 1306 .type = META, 1307 .rw = WRITE_SYNC | REQ_META | REQ_PRIO, 1308 .blk_addr = page->index, 1309 .page = page, 1310 .encrypted_page = NULL, 1311 }; 1312 1313 if (unlikely(page->index >= MAIN_BLKADDR(sbi))) 1314 fio.rw &= ~REQ_META; 1315 1316 set_page_writeback(page); 1317 f2fs_submit_page_mbio(&fio); 1318 } 1319 1320 void write_node_page(unsigned int nid, struct f2fs_io_info *fio) 1321 { 1322 struct f2fs_summary sum; 1323 1324 set_summary(&sum, nid, 0, 0); 1325 do_write_page(&sum, fio); 1326 } 1327 1328 void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) 1329 { 1330 struct f2fs_sb_info *sbi = fio->sbi; 1331 struct f2fs_summary sum; 1332 struct node_info ni; 1333 1334 f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); 1335 get_node_info(sbi, dn->nid, &ni); 1336 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 1337 do_write_page(&sum, fio); 1338 dn->data_blkaddr = fio->blk_addr; 1339 } 1340 1341 void rewrite_data_page(struct f2fs_io_info *fio) 1342 { 1343 stat_inc_inplace_blocks(fio->sbi); 1344 f2fs_submit_page_mbio(fio); 1345 } 1346 1347 static void __f2fs_replace_block(struct f2fs_sb_info *sbi, 1348 struct f2fs_summary *sum, 1349 block_t old_blkaddr, block_t new_blkaddr, 1350 bool recover_curseg) 1351 { 1352 struct sit_info *sit_i = SIT_I(sbi); 1353 struct curseg_info *curseg; 1354 unsigned int segno, old_cursegno; 1355 struct seg_entry *se; 1356 int type; 1357 unsigned short old_blkoff; 1358 1359 segno = GET_SEGNO(sbi, new_blkaddr); 1360 se = get_seg_entry(sbi, segno); 1361 type = se->type; 1362 1363 if (!recover_curseg) { 1364 /* for recovery flow */ 1365 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { 1366 if (old_blkaddr == NULL_ADDR) 1367 type = CURSEG_COLD_DATA; 1368 else 1369 type = CURSEG_WARM_DATA; 1370 } 1371 } else { 1372 if (!IS_CURSEG(sbi, segno)) 1373 type = CURSEG_WARM_DATA; 1374 } 1375 1376 curseg = CURSEG_I(sbi, type); 1377 1378 mutex_lock(&curseg->curseg_mutex); 1379 mutex_lock(&sit_i->sentry_lock); 1380 1381 old_cursegno = curseg->segno; 1382 old_blkoff = curseg->next_blkoff; 1383 1384 /* change the current segment */ 1385 if (segno != curseg->segno) { 1386 curseg->next_segno = segno; 1387 change_curseg(sbi, type, true); 1388 } 1389 1390 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); 1391 __add_sum_entry(sbi, type, sum); 1392 1393 if (!recover_curseg) 1394 update_sit_entry(sbi, new_blkaddr, 1); 1395 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) 1396 update_sit_entry(sbi, old_blkaddr, -1); 1397 1398 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); 1399 locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr)); 1400 1401 locate_dirty_segment(sbi, old_cursegno); 1402 1403 if (recover_curseg) { 1404 if (old_cursegno != curseg->segno) { 1405 curseg->next_segno = old_cursegno; 1406 change_curseg(sbi, type, true); 1407 } 1408 curseg->next_blkoff = old_blkoff; 1409 } 1410 1411 mutex_unlock(&sit_i->sentry_lock); 1412 mutex_unlock(&curseg->curseg_mutex); 1413 } 1414 1415 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, 1416 block_t old_addr, block_t new_addr, 1417 unsigned char version, bool recover_curseg) 1418 { 1419 struct f2fs_summary sum; 1420 1421 set_summary(&sum, dn->nid, dn->ofs_in_node, version); 1422 1423 __f2fs_replace_block(sbi, &sum, old_addr, new_addr, recover_curseg); 1424 1425 dn->data_blkaddr = new_addr; 1426 set_data_blkaddr(dn); 1427 f2fs_update_extent_cache(dn); 1428 } 1429 1430 static inline bool is_merged_page(struct f2fs_sb_info *sbi, 1431 struct page *page, enum page_type type) 1432 { 1433 enum page_type btype = PAGE_TYPE_OF_BIO(type); 1434 struct f2fs_bio_info *io = &sbi->write_io[btype]; 1435 struct bio_vec *bvec; 1436 struct page *target; 1437 int i; 1438 1439 down_read(&io->io_rwsem); 1440 if (!io->bio) { 1441 up_read(&io->io_rwsem); 1442 return false; 1443 } 1444 1445 bio_for_each_segment_all(bvec, io->bio, i) { 1446 1447 if (bvec->bv_page->mapping) { 1448 target = bvec->bv_page; 1449 } else { 1450 struct f2fs_crypto_ctx *ctx; 1451 1452 /* encrypted page */ 1453 ctx = (struct f2fs_crypto_ctx *)page_private( 1454 bvec->bv_page); 1455 target = ctx->w.control_page; 1456 } 1457 1458 if (page == target) { 1459 up_read(&io->io_rwsem); 1460 return true; 1461 } 1462 } 1463 1464 up_read(&io->io_rwsem); 1465 return false; 1466 } 1467 1468 void f2fs_wait_on_page_writeback(struct page *page, 1469 enum page_type type) 1470 { 1471 if (PageWriteback(page)) { 1472 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 1473 1474 if (is_merged_page(sbi, page, type)) 1475 f2fs_submit_merged_bio(sbi, type, WRITE); 1476 wait_on_page_writeback(page); 1477 } 1478 } 1479 1480 void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi, 1481 block_t blkaddr) 1482 { 1483 struct page *cpage; 1484 1485 if (blkaddr == NEW_ADDR) 1486 return; 1487 1488 f2fs_bug_on(sbi, blkaddr == NULL_ADDR); 1489 1490 cpage = find_lock_page(META_MAPPING(sbi), blkaddr); 1491 if (cpage) { 1492 f2fs_wait_on_page_writeback(cpage, DATA); 1493 f2fs_put_page(cpage, 1); 1494 } 1495 } 1496 1497 static int read_compacted_summaries(struct f2fs_sb_info *sbi) 1498 { 1499 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1500 struct curseg_info *seg_i; 1501 unsigned char *kaddr; 1502 struct page *page; 1503 block_t start; 1504 int i, j, offset; 1505 1506 start = start_sum_block(sbi); 1507 1508 page = get_meta_page(sbi, start++); 1509 kaddr = (unsigned char *)page_address(page); 1510 1511 /* Step 1: restore nat cache */ 1512 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); 1513 memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE); 1514 1515 /* Step 2: restore sit cache */ 1516 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); 1517 memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE, 1518 SUM_JOURNAL_SIZE); 1519 offset = 2 * SUM_JOURNAL_SIZE; 1520 1521 /* Step 3: restore summary entries */ 1522 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 1523 unsigned short blk_off; 1524 unsigned int segno; 1525 1526 seg_i = CURSEG_I(sbi, i); 1527 segno = le32_to_cpu(ckpt->cur_data_segno[i]); 1528 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); 1529 seg_i->next_segno = segno; 1530 reset_curseg(sbi, i, 0); 1531 seg_i->alloc_type = ckpt->alloc_type[i]; 1532 seg_i->next_blkoff = blk_off; 1533 1534 if (seg_i->alloc_type == SSR) 1535 blk_off = sbi->blocks_per_seg; 1536 1537 for (j = 0; j < blk_off; j++) { 1538 struct f2fs_summary *s; 1539 s = (struct f2fs_summary *)(kaddr + offset); 1540 seg_i->sum_blk->entries[j] = *s; 1541 offset += SUMMARY_SIZE; 1542 if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE - 1543 SUM_FOOTER_SIZE) 1544 continue; 1545 1546 f2fs_put_page(page, 1); 1547 page = NULL; 1548 1549 page = get_meta_page(sbi, start++); 1550 kaddr = (unsigned char *)page_address(page); 1551 offset = 0; 1552 } 1553 } 1554 f2fs_put_page(page, 1); 1555 return 0; 1556 } 1557 1558 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) 1559 { 1560 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1561 struct f2fs_summary_block *sum; 1562 struct curseg_info *curseg; 1563 struct page *new; 1564 unsigned short blk_off; 1565 unsigned int segno = 0; 1566 block_t blk_addr = 0; 1567 1568 /* get segment number and block addr */ 1569 if (IS_DATASEG(type)) { 1570 segno = le32_to_cpu(ckpt->cur_data_segno[type]); 1571 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - 1572 CURSEG_HOT_DATA]); 1573 if (__exist_node_summaries(sbi)) 1574 blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type); 1575 else 1576 blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); 1577 } else { 1578 segno = le32_to_cpu(ckpt->cur_node_segno[type - 1579 CURSEG_HOT_NODE]); 1580 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - 1581 CURSEG_HOT_NODE]); 1582 if (__exist_node_summaries(sbi)) 1583 blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, 1584 type - CURSEG_HOT_NODE); 1585 else 1586 blk_addr = GET_SUM_BLOCK(sbi, segno); 1587 } 1588 1589 new = get_meta_page(sbi, blk_addr); 1590 sum = (struct f2fs_summary_block *)page_address(new); 1591 1592 if (IS_NODESEG(type)) { 1593 if (__exist_node_summaries(sbi)) { 1594 struct f2fs_summary *ns = &sum->entries[0]; 1595 int i; 1596 for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { 1597 ns->version = 0; 1598 ns->ofs_in_node = 0; 1599 } 1600 } else { 1601 int err; 1602 1603 err = restore_node_summary(sbi, segno, sum); 1604 if (err) { 1605 f2fs_put_page(new, 1); 1606 return err; 1607 } 1608 } 1609 } 1610 1611 /* set uncompleted segment to curseg */ 1612 curseg = CURSEG_I(sbi, type); 1613 mutex_lock(&curseg->curseg_mutex); 1614 memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE); 1615 curseg->next_segno = segno; 1616 reset_curseg(sbi, type, 0); 1617 curseg->alloc_type = ckpt->alloc_type[type]; 1618 curseg->next_blkoff = blk_off; 1619 mutex_unlock(&curseg->curseg_mutex); 1620 f2fs_put_page(new, 1); 1621 return 0; 1622 } 1623 1624 static int restore_curseg_summaries(struct f2fs_sb_info *sbi) 1625 { 1626 int type = CURSEG_HOT_DATA; 1627 int err; 1628 1629 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { 1630 int npages = npages_for_summary_flush(sbi, true); 1631 1632 if (npages >= 2) 1633 ra_meta_pages(sbi, start_sum_block(sbi), npages, 1634 META_CP, true); 1635 1636 /* restore for compacted data summary */ 1637 if (read_compacted_summaries(sbi)) 1638 return -EINVAL; 1639 type = CURSEG_HOT_NODE; 1640 } 1641 1642 if (__exist_node_summaries(sbi)) 1643 ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type), 1644 NR_CURSEG_TYPE - type, META_CP, true); 1645 1646 for (; type <= CURSEG_COLD_NODE; type++) { 1647 err = read_normal_summaries(sbi, type); 1648 if (err) 1649 return err; 1650 } 1651 1652 return 0; 1653 } 1654 1655 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) 1656 { 1657 struct page *page; 1658 unsigned char *kaddr; 1659 struct f2fs_summary *summary; 1660 struct curseg_info *seg_i; 1661 int written_size = 0; 1662 int i, j; 1663 1664 page = grab_meta_page(sbi, blkaddr++); 1665 kaddr = (unsigned char *)page_address(page); 1666 1667 /* Step 1: write nat cache */ 1668 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); 1669 memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE); 1670 written_size += SUM_JOURNAL_SIZE; 1671 1672 /* Step 2: write sit cache */ 1673 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); 1674 memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits, 1675 SUM_JOURNAL_SIZE); 1676 written_size += SUM_JOURNAL_SIZE; 1677 1678 /* Step 3: write summary entries */ 1679 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 1680 unsigned short blkoff; 1681 seg_i = CURSEG_I(sbi, i); 1682 if (sbi->ckpt->alloc_type[i] == SSR) 1683 blkoff = sbi->blocks_per_seg; 1684 else 1685 blkoff = curseg_blkoff(sbi, i); 1686 1687 for (j = 0; j < blkoff; j++) { 1688 if (!page) { 1689 page = grab_meta_page(sbi, blkaddr++); 1690 kaddr = (unsigned char *)page_address(page); 1691 written_size = 0; 1692 } 1693 summary = (struct f2fs_summary *)(kaddr + written_size); 1694 *summary = seg_i->sum_blk->entries[j]; 1695 written_size += SUMMARY_SIZE; 1696 1697 if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE - 1698 SUM_FOOTER_SIZE) 1699 continue; 1700 1701 set_page_dirty(page); 1702 f2fs_put_page(page, 1); 1703 page = NULL; 1704 } 1705 } 1706 if (page) { 1707 set_page_dirty(page); 1708 f2fs_put_page(page, 1); 1709 } 1710 } 1711 1712 static void write_normal_summaries(struct f2fs_sb_info *sbi, 1713 block_t blkaddr, int type) 1714 { 1715 int i, end; 1716 if (IS_DATASEG(type)) 1717 end = type + NR_CURSEG_DATA_TYPE; 1718 else 1719 end = type + NR_CURSEG_NODE_TYPE; 1720 1721 for (i = type; i < end; i++) { 1722 struct curseg_info *sum = CURSEG_I(sbi, i); 1723 mutex_lock(&sum->curseg_mutex); 1724 write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type)); 1725 mutex_unlock(&sum->curseg_mutex); 1726 } 1727 } 1728 1729 void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) 1730 { 1731 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) 1732 write_compacted_summaries(sbi, start_blk); 1733 else 1734 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); 1735 } 1736 1737 void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) 1738 { 1739 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); 1740 } 1741 1742 int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, 1743 unsigned int val, int alloc) 1744 { 1745 int i; 1746 1747 if (type == NAT_JOURNAL) { 1748 for (i = 0; i < nats_in_cursum(sum); i++) { 1749 if (le32_to_cpu(nid_in_journal(sum, i)) == val) 1750 return i; 1751 } 1752 if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) 1753 return update_nats_in_cursum(sum, 1); 1754 } else if (type == SIT_JOURNAL) { 1755 for (i = 0; i < sits_in_cursum(sum); i++) 1756 if (le32_to_cpu(segno_in_journal(sum, i)) == val) 1757 return i; 1758 if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES) 1759 return update_sits_in_cursum(sum, 1); 1760 } 1761 return -1; 1762 } 1763 1764 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, 1765 unsigned int segno) 1766 { 1767 return get_meta_page(sbi, current_sit_addr(sbi, segno)); 1768 } 1769 1770 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, 1771 unsigned int start) 1772 { 1773 struct sit_info *sit_i = SIT_I(sbi); 1774 struct page *src_page, *dst_page; 1775 pgoff_t src_off, dst_off; 1776 void *src_addr, *dst_addr; 1777 1778 src_off = current_sit_addr(sbi, start); 1779 dst_off = next_sit_addr(sbi, src_off); 1780 1781 /* get current sit block page without lock */ 1782 src_page = get_meta_page(sbi, src_off); 1783 dst_page = grab_meta_page(sbi, dst_off); 1784 f2fs_bug_on(sbi, PageDirty(src_page)); 1785 1786 src_addr = page_address(src_page); 1787 dst_addr = page_address(dst_page); 1788 memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE); 1789 1790 set_page_dirty(dst_page); 1791 f2fs_put_page(src_page, 1); 1792 1793 set_to_next_sit(sit_i, start); 1794 1795 return dst_page; 1796 } 1797 1798 static struct sit_entry_set *grab_sit_entry_set(void) 1799 { 1800 struct sit_entry_set *ses = 1801 f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS); 1802 1803 ses->entry_cnt = 0; 1804 INIT_LIST_HEAD(&ses->set_list); 1805 return ses; 1806 } 1807 1808 static void release_sit_entry_set(struct sit_entry_set *ses) 1809 { 1810 list_del(&ses->set_list); 1811 kmem_cache_free(sit_entry_set_slab, ses); 1812 } 1813 1814 static void adjust_sit_entry_set(struct sit_entry_set *ses, 1815 struct list_head *head) 1816 { 1817 struct sit_entry_set *next = ses; 1818 1819 if (list_is_last(&ses->set_list, head)) 1820 return; 1821 1822 list_for_each_entry_continue(next, head, set_list) 1823 if (ses->entry_cnt <= next->entry_cnt) 1824 break; 1825 1826 list_move_tail(&ses->set_list, &next->set_list); 1827 } 1828 1829 static void add_sit_entry(unsigned int segno, struct list_head *head) 1830 { 1831 struct sit_entry_set *ses; 1832 unsigned int start_segno = START_SEGNO(segno); 1833 1834 list_for_each_entry(ses, head, set_list) { 1835 if (ses->start_segno == start_segno) { 1836 ses->entry_cnt++; 1837 adjust_sit_entry_set(ses, head); 1838 return; 1839 } 1840 } 1841 1842 ses = grab_sit_entry_set(); 1843 1844 ses->start_segno = start_segno; 1845 ses->entry_cnt++; 1846 list_add(&ses->set_list, head); 1847 } 1848 1849 static void add_sits_in_set(struct f2fs_sb_info *sbi) 1850 { 1851 struct f2fs_sm_info *sm_info = SM_I(sbi); 1852 struct list_head *set_list = &sm_info->sit_entry_set; 1853 unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap; 1854 unsigned int segno; 1855 1856 for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi)) 1857 add_sit_entry(segno, set_list); 1858 } 1859 1860 static void remove_sits_in_journal(struct f2fs_sb_info *sbi) 1861 { 1862 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 1863 struct f2fs_summary_block *sum = curseg->sum_blk; 1864 int i; 1865 1866 for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { 1867 unsigned int segno; 1868 bool dirtied; 1869 1870 segno = le32_to_cpu(segno_in_journal(sum, i)); 1871 dirtied = __mark_sit_entry_dirty(sbi, segno); 1872 1873 if (!dirtied) 1874 add_sit_entry(segno, &SM_I(sbi)->sit_entry_set); 1875 } 1876 update_sits_in_cursum(sum, -sits_in_cursum(sum)); 1877 } 1878 1879 /* 1880 * CP calls this function, which flushes SIT entries including sit_journal, 1881 * and moves prefree segs to free segs. 1882 */ 1883 void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) 1884 { 1885 struct sit_info *sit_i = SIT_I(sbi); 1886 unsigned long *bitmap = sit_i->dirty_sentries_bitmap; 1887 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 1888 struct f2fs_summary_block *sum = curseg->sum_blk; 1889 struct sit_entry_set *ses, *tmp; 1890 struct list_head *head = &SM_I(sbi)->sit_entry_set; 1891 bool to_journal = true; 1892 struct seg_entry *se; 1893 1894 mutex_lock(&curseg->curseg_mutex); 1895 mutex_lock(&sit_i->sentry_lock); 1896 1897 if (!sit_i->dirty_sentries) 1898 goto out; 1899 1900 /* 1901 * add and account sit entries of dirty bitmap in sit entry 1902 * set temporarily 1903 */ 1904 add_sits_in_set(sbi); 1905 1906 /* 1907 * if there are no enough space in journal to store dirty sit 1908 * entries, remove all entries from journal and add and account 1909 * them in sit entry set. 1910 */ 1911 if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL)) 1912 remove_sits_in_journal(sbi); 1913 1914 /* 1915 * there are two steps to flush sit entries: 1916 * #1, flush sit entries to journal in current cold data summary block. 1917 * #2, flush sit entries to sit page. 1918 */ 1919 list_for_each_entry_safe(ses, tmp, head, set_list) { 1920 struct page *page = NULL; 1921 struct f2fs_sit_block *raw_sit = NULL; 1922 unsigned int start_segno = ses->start_segno; 1923 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, 1924 (unsigned long)MAIN_SEGS(sbi)); 1925 unsigned int segno = start_segno; 1926 1927 if (to_journal && 1928 !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL)) 1929 to_journal = false; 1930 1931 if (!to_journal) { 1932 page = get_next_sit_page(sbi, start_segno); 1933 raw_sit = page_address(page); 1934 } 1935 1936 /* flush dirty sit entries in region of current sit set */ 1937 for_each_set_bit_from(segno, bitmap, end) { 1938 int offset, sit_offset; 1939 1940 se = get_seg_entry(sbi, segno); 1941 1942 /* add discard candidates */ 1943 if (cpc->reason != CP_DISCARD) { 1944 cpc->trim_start = segno; 1945 add_discard_addrs(sbi, cpc); 1946 } 1947 1948 if (to_journal) { 1949 offset = lookup_journal_in_cursum(sum, 1950 SIT_JOURNAL, segno, 1); 1951 f2fs_bug_on(sbi, offset < 0); 1952 segno_in_journal(sum, offset) = 1953 cpu_to_le32(segno); 1954 seg_info_to_raw_sit(se, 1955 &sit_in_journal(sum, offset)); 1956 } else { 1957 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); 1958 seg_info_to_raw_sit(se, 1959 &raw_sit->entries[sit_offset]); 1960 } 1961 1962 __clear_bit(segno, bitmap); 1963 sit_i->dirty_sentries--; 1964 ses->entry_cnt--; 1965 } 1966 1967 if (!to_journal) 1968 f2fs_put_page(page, 1); 1969 1970 f2fs_bug_on(sbi, ses->entry_cnt); 1971 release_sit_entry_set(ses); 1972 } 1973 1974 f2fs_bug_on(sbi, !list_empty(head)); 1975 f2fs_bug_on(sbi, sit_i->dirty_sentries); 1976 out: 1977 if (cpc->reason == CP_DISCARD) { 1978 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) 1979 add_discard_addrs(sbi, cpc); 1980 } 1981 mutex_unlock(&sit_i->sentry_lock); 1982 mutex_unlock(&curseg->curseg_mutex); 1983 1984 set_prefree_as_free_segments(sbi); 1985 } 1986 1987 static int build_sit_info(struct f2fs_sb_info *sbi) 1988 { 1989 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 1990 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1991 struct sit_info *sit_i; 1992 unsigned int sit_segs, start; 1993 char *src_bitmap, *dst_bitmap; 1994 unsigned int bitmap_size; 1995 1996 /* allocate memory for SIT information */ 1997 sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL); 1998 if (!sit_i) 1999 return -ENOMEM; 2000 2001 SM_I(sbi)->sit_info = sit_i; 2002 2003 sit_i->sentries = f2fs_kvzalloc(MAIN_SEGS(sbi) * 2004 sizeof(struct seg_entry), GFP_KERNEL); 2005 if (!sit_i->sentries) 2006 return -ENOMEM; 2007 2008 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 2009 sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); 2010 if (!sit_i->dirty_sentries_bitmap) 2011 return -ENOMEM; 2012 2013 for (start = 0; start < MAIN_SEGS(sbi); start++) { 2014 sit_i->sentries[start].cur_valid_map 2015 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); 2016 sit_i->sentries[start].ckpt_valid_map 2017 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); 2018 sit_i->sentries[start].discard_map 2019 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); 2020 if (!sit_i->sentries[start].cur_valid_map || 2021 !sit_i->sentries[start].ckpt_valid_map || 2022 !sit_i->sentries[start].discard_map) 2023 return -ENOMEM; 2024 } 2025 2026 sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); 2027 if (!sit_i->tmp_map) 2028 return -ENOMEM; 2029 2030 if (sbi->segs_per_sec > 1) { 2031 sit_i->sec_entries = f2fs_kvzalloc(MAIN_SECS(sbi) * 2032 sizeof(struct sec_entry), GFP_KERNEL); 2033 if (!sit_i->sec_entries) 2034 return -ENOMEM; 2035 } 2036 2037 /* get information related with SIT */ 2038 sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1; 2039 2040 /* setup SIT bitmap from ckeckpoint pack */ 2041 bitmap_size = __bitmap_size(sbi, SIT_BITMAP); 2042 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); 2043 2044 dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); 2045 if (!dst_bitmap) 2046 return -ENOMEM; 2047 2048 /* init SIT information */ 2049 sit_i->s_ops = &default_salloc_ops; 2050 2051 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); 2052 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; 2053 sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count); 2054 sit_i->sit_bitmap = dst_bitmap; 2055 sit_i->bitmap_size = bitmap_size; 2056 sit_i->dirty_sentries = 0; 2057 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; 2058 sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); 2059 sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec; 2060 mutex_init(&sit_i->sentry_lock); 2061 return 0; 2062 } 2063 2064 static int build_free_segmap(struct f2fs_sb_info *sbi) 2065 { 2066 struct free_segmap_info *free_i; 2067 unsigned int bitmap_size, sec_bitmap_size; 2068 2069 /* allocate memory for free segmap information */ 2070 free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL); 2071 if (!free_i) 2072 return -ENOMEM; 2073 2074 SM_I(sbi)->free_info = free_i; 2075 2076 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 2077 free_i->free_segmap = f2fs_kvmalloc(bitmap_size, GFP_KERNEL); 2078 if (!free_i->free_segmap) 2079 return -ENOMEM; 2080 2081 sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); 2082 free_i->free_secmap = f2fs_kvmalloc(sec_bitmap_size, GFP_KERNEL); 2083 if (!free_i->free_secmap) 2084 return -ENOMEM; 2085 2086 /* set all segments as dirty temporarily */ 2087 memset(free_i->free_segmap, 0xff, bitmap_size); 2088 memset(free_i->free_secmap, 0xff, sec_bitmap_size); 2089 2090 /* init free segmap information */ 2091 free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi)); 2092 free_i->free_segments = 0; 2093 free_i->free_sections = 0; 2094 spin_lock_init(&free_i->segmap_lock); 2095 return 0; 2096 } 2097 2098 static int build_curseg(struct f2fs_sb_info *sbi) 2099 { 2100 struct curseg_info *array; 2101 int i; 2102 2103 array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL); 2104 if (!array) 2105 return -ENOMEM; 2106 2107 SM_I(sbi)->curseg_array = array; 2108 2109 for (i = 0; i < NR_CURSEG_TYPE; i++) { 2110 mutex_init(&array[i].curseg_mutex); 2111 array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL); 2112 if (!array[i].sum_blk) 2113 return -ENOMEM; 2114 array[i].segno = NULL_SEGNO; 2115 array[i].next_blkoff = 0; 2116 } 2117 return restore_curseg_summaries(sbi); 2118 } 2119 2120 static void build_sit_entries(struct f2fs_sb_info *sbi) 2121 { 2122 struct sit_info *sit_i = SIT_I(sbi); 2123 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 2124 struct f2fs_summary_block *sum = curseg->sum_blk; 2125 int sit_blk_cnt = SIT_BLK_CNT(sbi); 2126 unsigned int i, start, end; 2127 unsigned int readed, start_blk = 0; 2128 int nrpages = MAX_BIO_BLOCKS(sbi); 2129 2130 do { 2131 readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true); 2132 2133 start = start_blk * sit_i->sents_per_block; 2134 end = (start_blk + readed) * sit_i->sents_per_block; 2135 2136 for (; start < end && start < MAIN_SEGS(sbi); start++) { 2137 struct seg_entry *se = &sit_i->sentries[start]; 2138 struct f2fs_sit_block *sit_blk; 2139 struct f2fs_sit_entry sit; 2140 struct page *page; 2141 2142 mutex_lock(&curseg->curseg_mutex); 2143 for (i = 0; i < sits_in_cursum(sum); i++) { 2144 if (le32_to_cpu(segno_in_journal(sum, i)) 2145 == start) { 2146 sit = sit_in_journal(sum, i); 2147 mutex_unlock(&curseg->curseg_mutex); 2148 goto got_it; 2149 } 2150 } 2151 mutex_unlock(&curseg->curseg_mutex); 2152 2153 page = get_current_sit_page(sbi, start); 2154 sit_blk = (struct f2fs_sit_block *)page_address(page); 2155 sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; 2156 f2fs_put_page(page, 1); 2157 got_it: 2158 check_block_count(sbi, start, &sit); 2159 seg_info_from_raw_sit(se, &sit); 2160 2161 /* build discard map only one time */ 2162 memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); 2163 sbi->discard_blks += sbi->blocks_per_seg - se->valid_blocks; 2164 2165 if (sbi->segs_per_sec > 1) { 2166 struct sec_entry *e = get_sec_entry(sbi, start); 2167 e->valid_blocks += se->valid_blocks; 2168 } 2169 } 2170 start_blk += readed; 2171 } while (start_blk < sit_blk_cnt); 2172 } 2173 2174 static void init_free_segmap(struct f2fs_sb_info *sbi) 2175 { 2176 unsigned int start; 2177 int type; 2178 2179 for (start = 0; start < MAIN_SEGS(sbi); start++) { 2180 struct seg_entry *sentry = get_seg_entry(sbi, start); 2181 if (!sentry->valid_blocks) 2182 __set_free(sbi, start); 2183 } 2184 2185 /* set use the current segments */ 2186 for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) { 2187 struct curseg_info *curseg_t = CURSEG_I(sbi, type); 2188 __set_test_and_inuse(sbi, curseg_t->segno); 2189 } 2190 } 2191 2192 static void init_dirty_segmap(struct f2fs_sb_info *sbi) 2193 { 2194 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 2195 struct free_segmap_info *free_i = FREE_I(sbi); 2196 unsigned int segno = 0, offset = 0; 2197 unsigned short valid_blocks; 2198 2199 while (1) { 2200 /* find dirty segment based on free segmap */ 2201 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset); 2202 if (segno >= MAIN_SEGS(sbi)) 2203 break; 2204 offset = segno + 1; 2205 valid_blocks = get_valid_blocks(sbi, segno, 0); 2206 if (valid_blocks == sbi->blocks_per_seg || !valid_blocks) 2207 continue; 2208 if (valid_blocks > sbi->blocks_per_seg) { 2209 f2fs_bug_on(sbi, 1); 2210 continue; 2211 } 2212 mutex_lock(&dirty_i->seglist_lock); 2213 __locate_dirty_segment(sbi, segno, DIRTY); 2214 mutex_unlock(&dirty_i->seglist_lock); 2215 } 2216 } 2217 2218 static int init_victim_secmap(struct f2fs_sb_info *sbi) 2219 { 2220 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 2221 unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); 2222 2223 dirty_i->victim_secmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); 2224 if (!dirty_i->victim_secmap) 2225 return -ENOMEM; 2226 return 0; 2227 } 2228 2229 static int build_dirty_segmap(struct f2fs_sb_info *sbi) 2230 { 2231 struct dirty_seglist_info *dirty_i; 2232 unsigned int bitmap_size, i; 2233 2234 /* allocate memory for dirty segments list information */ 2235 dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL); 2236 if (!dirty_i) 2237 return -ENOMEM; 2238 2239 SM_I(sbi)->dirty_info = dirty_i; 2240 mutex_init(&dirty_i->seglist_lock); 2241 2242 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 2243 2244 for (i = 0; i < NR_DIRTY_TYPE; i++) { 2245 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); 2246 if (!dirty_i->dirty_segmap[i]) 2247 return -ENOMEM; 2248 } 2249 2250 init_dirty_segmap(sbi); 2251 return init_victim_secmap(sbi); 2252 } 2253 2254 /* 2255 * Update min, max modified time for cost-benefit GC algorithm 2256 */ 2257 static void init_min_max_mtime(struct f2fs_sb_info *sbi) 2258 { 2259 struct sit_info *sit_i = SIT_I(sbi); 2260 unsigned int segno; 2261 2262 mutex_lock(&sit_i->sentry_lock); 2263 2264 sit_i->min_mtime = LLONG_MAX; 2265 2266 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { 2267 unsigned int i; 2268 unsigned long long mtime = 0; 2269 2270 for (i = 0; i < sbi->segs_per_sec; i++) 2271 mtime += get_seg_entry(sbi, segno + i)->mtime; 2272 2273 mtime = div_u64(mtime, sbi->segs_per_sec); 2274 2275 if (sit_i->min_mtime > mtime) 2276 sit_i->min_mtime = mtime; 2277 } 2278 sit_i->max_mtime = get_mtime(sbi); 2279 mutex_unlock(&sit_i->sentry_lock); 2280 } 2281 2282 int build_segment_manager(struct f2fs_sb_info *sbi) 2283 { 2284 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 2285 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 2286 struct f2fs_sm_info *sm_info; 2287 int err; 2288 2289 sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL); 2290 if (!sm_info) 2291 return -ENOMEM; 2292 2293 /* init sm info */ 2294 sbi->sm_info = sm_info; 2295 sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); 2296 sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr); 2297 sm_info->segment_count = le32_to_cpu(raw_super->segment_count); 2298 sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count); 2299 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); 2300 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); 2301 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); 2302 sm_info->rec_prefree_segments = sm_info->main_segments * 2303 DEF_RECLAIM_PREFREE_SEGMENTS / 100; 2304 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; 2305 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; 2306 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; 2307 2308 INIT_LIST_HEAD(&sm_info->discard_list); 2309 sm_info->nr_discards = 0; 2310 sm_info->max_discards = 0; 2311 2312 sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; 2313 2314 INIT_LIST_HEAD(&sm_info->sit_entry_set); 2315 2316 if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { 2317 err = create_flush_cmd_control(sbi); 2318 if (err) 2319 return err; 2320 } 2321 2322 err = build_sit_info(sbi); 2323 if (err) 2324 return err; 2325 err = build_free_segmap(sbi); 2326 if (err) 2327 return err; 2328 err = build_curseg(sbi); 2329 if (err) 2330 return err; 2331 2332 /* reinit free segmap based on SIT */ 2333 build_sit_entries(sbi); 2334 2335 init_free_segmap(sbi); 2336 err = build_dirty_segmap(sbi); 2337 if (err) 2338 return err; 2339 2340 init_min_max_mtime(sbi); 2341 return 0; 2342 } 2343 2344 static void discard_dirty_segmap(struct f2fs_sb_info *sbi, 2345 enum dirty_type dirty_type) 2346 { 2347 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 2348 2349 mutex_lock(&dirty_i->seglist_lock); 2350 kvfree(dirty_i->dirty_segmap[dirty_type]); 2351 dirty_i->nr_dirty[dirty_type] = 0; 2352 mutex_unlock(&dirty_i->seglist_lock); 2353 } 2354 2355 static void destroy_victim_secmap(struct f2fs_sb_info *sbi) 2356 { 2357 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 2358 kvfree(dirty_i->victim_secmap); 2359 } 2360 2361 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) 2362 { 2363 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 2364 int i; 2365 2366 if (!dirty_i) 2367 return; 2368 2369 /* discard pre-free/dirty segments list */ 2370 for (i = 0; i < NR_DIRTY_TYPE; i++) 2371 discard_dirty_segmap(sbi, i); 2372 2373 destroy_victim_secmap(sbi); 2374 SM_I(sbi)->dirty_info = NULL; 2375 kfree(dirty_i); 2376 } 2377 2378 static void destroy_curseg(struct f2fs_sb_info *sbi) 2379 { 2380 struct curseg_info *array = SM_I(sbi)->curseg_array; 2381 int i; 2382 2383 if (!array) 2384 return; 2385 SM_I(sbi)->curseg_array = NULL; 2386 for (i = 0; i < NR_CURSEG_TYPE; i++) 2387 kfree(array[i].sum_blk); 2388 kfree(array); 2389 } 2390 2391 static void destroy_free_segmap(struct f2fs_sb_info *sbi) 2392 { 2393 struct free_segmap_info *free_i = SM_I(sbi)->free_info; 2394 if (!free_i) 2395 return; 2396 SM_I(sbi)->free_info = NULL; 2397 kvfree(free_i->free_segmap); 2398 kvfree(free_i->free_secmap); 2399 kfree(free_i); 2400 } 2401 2402 static void destroy_sit_info(struct f2fs_sb_info *sbi) 2403 { 2404 struct sit_info *sit_i = SIT_I(sbi); 2405 unsigned int start; 2406 2407 if (!sit_i) 2408 return; 2409 2410 if (sit_i->sentries) { 2411 for (start = 0; start < MAIN_SEGS(sbi); start++) { 2412 kfree(sit_i->sentries[start].cur_valid_map); 2413 kfree(sit_i->sentries[start].ckpt_valid_map); 2414 kfree(sit_i->sentries[start].discard_map); 2415 } 2416 } 2417 kfree(sit_i->tmp_map); 2418 2419 kvfree(sit_i->sentries); 2420 kvfree(sit_i->sec_entries); 2421 kvfree(sit_i->dirty_sentries_bitmap); 2422 2423 SM_I(sbi)->sit_info = NULL; 2424 kfree(sit_i->sit_bitmap); 2425 kfree(sit_i); 2426 } 2427 2428 void destroy_segment_manager(struct f2fs_sb_info *sbi) 2429 { 2430 struct f2fs_sm_info *sm_info = SM_I(sbi); 2431 2432 if (!sm_info) 2433 return; 2434 destroy_flush_cmd_control(sbi); 2435 destroy_dirty_segmap(sbi); 2436 destroy_curseg(sbi); 2437 destroy_free_segmap(sbi); 2438 destroy_sit_info(sbi); 2439 sbi->sm_info = NULL; 2440 kfree(sm_info); 2441 } 2442 2443 int __init create_segment_manager_caches(void) 2444 { 2445 discard_entry_slab = f2fs_kmem_cache_create("discard_entry", 2446 sizeof(struct discard_entry)); 2447 if (!discard_entry_slab) 2448 goto fail; 2449 2450 sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", 2451 sizeof(struct sit_entry_set)); 2452 if (!sit_entry_set_slab) 2453 goto destory_discard_entry; 2454 2455 inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", 2456 sizeof(struct inmem_pages)); 2457 if (!inmem_entry_slab) 2458 goto destroy_sit_entry_set; 2459 return 0; 2460 2461 destroy_sit_entry_set: 2462 kmem_cache_destroy(sit_entry_set_slab); 2463 destory_discard_entry: 2464 kmem_cache_destroy(discard_entry_slab); 2465 fail: 2466 return -ENOMEM; 2467 } 2468 2469 void destroy_segment_manager_caches(void) 2470 { 2471 kmem_cache_destroy(sit_entry_set_slab); 2472 kmem_cache_destroy(discard_entry_slab); 2473 kmem_cache_destroy(inmem_entry_slab); 2474 } 2475