1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * f2fs extent cache support 4 * 5 * Copyright (c) 2015 Motorola Mobility 6 * Copyright (c) 2015 Samsung Electronics 7 * Authors: Jaegeuk Kim <jaegeuk@kernel.org> 8 * Chao Yu <chao2.yu@samsung.com> 9 * 10 * block_age-based extent cache added by: 11 * Copyright (c) 2022 xiaomi Co., Ltd. 12 * http://www.xiaomi.com/ 13 */ 14 15 #include <linux/fs.h> 16 #include <linux/f2fs_fs.h> 17 18 #include "f2fs.h" 19 #include "node.h" 20 #include <trace/events/f2fs.h> 21 22 bool sanity_check_extent_cache(struct inode *inode) 23 { 24 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 25 struct f2fs_inode_info *fi = F2FS_I(inode); 26 struct extent_info *ei; 27 28 if (!fi->extent_tree[EX_READ]) 29 return true; 30 31 ei = &fi->extent_tree[EX_READ]->largest; 32 33 if (ei->len && 34 (!f2fs_is_valid_blkaddr(sbi, ei->blk, 35 DATA_GENERIC_ENHANCE) || 36 !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, 37 DATA_GENERIC_ENHANCE))) { 38 set_sbi_flag(sbi, SBI_NEED_FSCK); 39 f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", 40 __func__, inode->i_ino, 41 ei->blk, ei->fofs, ei->len); 42 return false; 43 } 44 return true; 45 } 46 47 static void __set_extent_info(struct extent_info *ei, 48 unsigned int fofs, unsigned int len, 49 block_t blk, bool keep_clen, 50 unsigned long age, unsigned long last_blocks, 51 enum extent_type type) 52 { 53 ei->fofs = fofs; 54 ei->len = len; 55 56 if (type == EX_READ) { 57 ei->blk = blk; 58 if (keep_clen) 59 return; 60 #ifdef CONFIG_F2FS_FS_COMPRESSION 61 ei->c_len = 0; 62 #endif 63 } else if (type == EX_BLOCK_AGE) { 64 ei->age = age; 65 ei->last_blocks = last_blocks; 66 } 67 } 68 69 static bool __may_read_extent_tree(struct inode *inode) 70 { 71 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 72 73 if (!test_opt(sbi, READ_EXTENT_CACHE)) 74 return false; 75 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 76 return false; 77 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && 78 !f2fs_sb_has_readonly(sbi)) 79 return false; 80 return S_ISREG(inode->i_mode); 81 } 82 83 static bool __may_age_extent_tree(struct inode *inode) 84 { 85 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 86 87 if (!test_opt(sbi, AGE_EXTENT_CACHE)) 88 return false; 89 /* don't cache block age info for cold file */ 90 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) 91 return false; 92 if (file_is_cold(inode)) 93 return false; 94 95 return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); 96 } 97 98 static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) 99 { 100 if (type == EX_READ) 101 return __may_read_extent_tree(inode); 102 else if (type == EX_BLOCK_AGE) 103 return __may_age_extent_tree(inode); 104 return false; 105 } 106 107 static bool __may_extent_tree(struct inode *inode, enum extent_type type) 108 { 109 /* 110 * for recovered files during mount do not create extents 111 * if shrinker is not registered. 112 */ 113 if (list_empty(&F2FS_I_SB(inode)->s_list)) 114 return false; 115 116 return __init_may_extent_tree(inode, type); 117 } 118 119 static void __try_update_largest_extent(struct extent_tree *et, 120 struct extent_node *en) 121 { 122 if (et->type != EX_READ) 123 return; 124 if (en->ei.len <= et->largest.len) 125 return; 126 127 et->largest = en->ei; 128 et->largest_updated = true; 129 } 130 131 static bool __is_extent_mergeable(struct extent_info *back, 132 struct extent_info *front, enum extent_type type) 133 { 134 if (type == EX_READ) { 135 #ifdef CONFIG_F2FS_FS_COMPRESSION 136 if (back->c_len && back->len != back->c_len) 137 return false; 138 if (front->c_len && front->len != front->c_len) 139 return false; 140 #endif 141 return (back->fofs + back->len == front->fofs && 142 back->blk + back->len == front->blk); 143 } else if (type == EX_BLOCK_AGE) { 144 return (back->fofs + back->len == front->fofs && 145 abs(back->age - front->age) <= SAME_AGE_REGION && 146 abs(back->last_blocks - front->last_blocks) <= 147 SAME_AGE_REGION); 148 } 149 return false; 150 } 151 152 static bool __is_back_mergeable(struct extent_info *cur, 153 struct extent_info *back, enum extent_type type) 154 { 155 return __is_extent_mergeable(back, cur, type); 156 } 157 158 static bool __is_front_mergeable(struct extent_info *cur, 159 struct extent_info *front, enum extent_type type) 160 { 161 return __is_extent_mergeable(cur, front, type); 162 } 163 164 static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re, 165 unsigned int ofs) 166 { 167 if (cached_re) { 168 if (cached_re->ofs <= ofs && 169 cached_re->ofs + cached_re->len > ofs) { 170 return cached_re; 171 } 172 } 173 return NULL; 174 } 175 176 static struct rb_entry *__lookup_rb_tree_slow(struct rb_root_cached *root, 177 unsigned int ofs) 178 { 179 struct rb_node *node = root->rb_root.rb_node; 180 struct rb_entry *re; 181 182 while (node) { 183 re = rb_entry(node, struct rb_entry, rb_node); 184 185 if (ofs < re->ofs) 186 node = node->rb_left; 187 else if (ofs >= re->ofs + re->len) 188 node = node->rb_right; 189 else 190 return re; 191 } 192 return NULL; 193 } 194 195 struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root, 196 struct rb_entry *cached_re, unsigned int ofs) 197 { 198 struct rb_entry *re; 199 200 re = __lookup_rb_tree_fast(cached_re, ofs); 201 if (!re) 202 return __lookup_rb_tree_slow(root, ofs); 203 204 return re; 205 } 206 207 struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi, 208 struct rb_root_cached *root, 209 struct rb_node **parent, 210 unsigned long long key, bool *leftmost) 211 { 212 struct rb_node **p = &root->rb_root.rb_node; 213 struct rb_entry *re; 214 215 while (*p) { 216 *parent = *p; 217 re = rb_entry(*parent, struct rb_entry, rb_node); 218 219 if (key < re->key) { 220 p = &(*p)->rb_left; 221 } else { 222 p = &(*p)->rb_right; 223 *leftmost = false; 224 } 225 } 226 227 return p; 228 } 229 230 struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, 231 struct rb_root_cached *root, 232 struct rb_node **parent, 233 unsigned int ofs, bool *leftmost) 234 { 235 struct rb_node **p = &root->rb_root.rb_node; 236 struct rb_entry *re; 237 238 while (*p) { 239 *parent = *p; 240 re = rb_entry(*parent, struct rb_entry, rb_node); 241 242 if (ofs < re->ofs) { 243 p = &(*p)->rb_left; 244 } else if (ofs >= re->ofs + re->len) { 245 p = &(*p)->rb_right; 246 *leftmost = false; 247 } else { 248 f2fs_bug_on(sbi, 1); 249 } 250 } 251 252 return p; 253 } 254 255 /* 256 * lookup rb entry in position of @ofs in rb-tree, 257 * if hit, return the entry, otherwise, return NULL 258 * @prev_ex: extent before ofs 259 * @next_ex: extent after ofs 260 * @insert_p: insert point for new extent at ofs 261 * in order to simplify the insertion after. 262 * tree must stay unchanged between lookup and insertion. 263 */ 264 struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, 265 struct rb_entry *cached_re, 266 unsigned int ofs, 267 struct rb_entry **prev_entry, 268 struct rb_entry **next_entry, 269 struct rb_node ***insert_p, 270 struct rb_node **insert_parent, 271 bool force, bool *leftmost) 272 { 273 struct rb_node **pnode = &root->rb_root.rb_node; 274 struct rb_node *parent = NULL, *tmp_node; 275 struct rb_entry *re = cached_re; 276 277 *insert_p = NULL; 278 *insert_parent = NULL; 279 *prev_entry = NULL; 280 *next_entry = NULL; 281 282 if (RB_EMPTY_ROOT(&root->rb_root)) 283 return NULL; 284 285 if (re) { 286 if (re->ofs <= ofs && re->ofs + re->len > ofs) 287 goto lookup_neighbors; 288 } 289 290 if (leftmost) 291 *leftmost = true; 292 293 while (*pnode) { 294 parent = *pnode; 295 re = rb_entry(*pnode, struct rb_entry, rb_node); 296 297 if (ofs < re->ofs) { 298 pnode = &(*pnode)->rb_left; 299 } else if (ofs >= re->ofs + re->len) { 300 pnode = &(*pnode)->rb_right; 301 if (leftmost) 302 *leftmost = false; 303 } else { 304 goto lookup_neighbors; 305 } 306 } 307 308 *insert_p = pnode; 309 *insert_parent = parent; 310 311 re = rb_entry(parent, struct rb_entry, rb_node); 312 tmp_node = parent; 313 if (parent && ofs > re->ofs) 314 tmp_node = rb_next(parent); 315 *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); 316 317 tmp_node = parent; 318 if (parent && ofs < re->ofs) 319 tmp_node = rb_prev(parent); 320 *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); 321 return NULL; 322 323 lookup_neighbors: 324 if (ofs == re->ofs || force) { 325 /* lookup prev node for merging backward later */ 326 tmp_node = rb_prev(&re->rb_node); 327 *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); 328 } 329 if (ofs == re->ofs + re->len - 1 || force) { 330 /* lookup next node for merging frontward later */ 331 tmp_node = rb_next(&re->rb_node); 332 *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); 333 } 334 return re; 335 } 336 337 bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, 338 struct rb_root_cached *root, bool check_key) 339 { 340 #ifdef CONFIG_F2FS_CHECK_FS 341 struct rb_node *cur = rb_first_cached(root), *next; 342 struct rb_entry *cur_re, *next_re; 343 344 if (!cur) 345 return true; 346 347 while (cur) { 348 next = rb_next(cur); 349 if (!next) 350 return true; 351 352 cur_re = rb_entry(cur, struct rb_entry, rb_node); 353 next_re = rb_entry(next, struct rb_entry, rb_node); 354 355 if (check_key) { 356 if (cur_re->key > next_re->key) { 357 f2fs_info(sbi, "inconsistent rbtree, " 358 "cur(%llu) next(%llu)", 359 cur_re->key, next_re->key); 360 return false; 361 } 362 goto next; 363 } 364 365 if (cur_re->ofs + cur_re->len > next_re->ofs) { 366 f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)", 367 cur_re->ofs, cur_re->len, 368 next_re->ofs, next_re->len); 369 return false; 370 } 371 next: 372 cur = next; 373 } 374 #endif 375 return true; 376 } 377 378 static struct kmem_cache *extent_tree_slab; 379 static struct kmem_cache *extent_node_slab; 380 381 static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, 382 struct extent_tree *et, struct extent_info *ei, 383 struct rb_node *parent, struct rb_node **p, 384 bool leftmost) 385 { 386 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 387 struct extent_node *en; 388 389 en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi); 390 if (!en) 391 return NULL; 392 393 en->ei = *ei; 394 INIT_LIST_HEAD(&en->list); 395 en->et = et; 396 397 rb_link_node(&en->rb_node, parent, p); 398 rb_insert_color_cached(&en->rb_node, &et->root, leftmost); 399 atomic_inc(&et->node_cnt); 400 atomic_inc(&eti->total_ext_node); 401 return en; 402 } 403 404 static void __detach_extent_node(struct f2fs_sb_info *sbi, 405 struct extent_tree *et, struct extent_node *en) 406 { 407 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 408 409 rb_erase_cached(&en->rb_node, &et->root); 410 atomic_dec(&et->node_cnt); 411 atomic_dec(&eti->total_ext_node); 412 413 if (et->cached_en == en) 414 et->cached_en = NULL; 415 kmem_cache_free(extent_node_slab, en); 416 } 417 418 /* 419 * Flow to release an extent_node: 420 * 1. list_del_init 421 * 2. __detach_extent_node 422 * 3. kmem_cache_free. 423 */ 424 static void __release_extent_node(struct f2fs_sb_info *sbi, 425 struct extent_tree *et, struct extent_node *en) 426 { 427 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 428 429 spin_lock(&eti->extent_lock); 430 f2fs_bug_on(sbi, list_empty(&en->list)); 431 list_del_init(&en->list); 432 spin_unlock(&eti->extent_lock); 433 434 __detach_extent_node(sbi, et, en); 435 } 436 437 static struct extent_tree *__grab_extent_tree(struct inode *inode, 438 enum extent_type type) 439 { 440 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 441 struct extent_tree_info *eti = &sbi->extent_tree[type]; 442 struct extent_tree *et; 443 nid_t ino = inode->i_ino; 444 445 mutex_lock(&eti->extent_tree_lock); 446 et = radix_tree_lookup(&eti->extent_tree_root, ino); 447 if (!et) { 448 et = f2fs_kmem_cache_alloc(extent_tree_slab, 449 GFP_NOFS, true, NULL); 450 f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et); 451 memset(et, 0, sizeof(struct extent_tree)); 452 et->ino = ino; 453 et->type = type; 454 et->root = RB_ROOT_CACHED; 455 et->cached_en = NULL; 456 rwlock_init(&et->lock); 457 INIT_LIST_HEAD(&et->list); 458 atomic_set(&et->node_cnt, 0); 459 atomic_inc(&eti->total_ext_tree); 460 } else { 461 atomic_dec(&eti->total_zombie_tree); 462 list_del_init(&et->list); 463 } 464 mutex_unlock(&eti->extent_tree_lock); 465 466 /* never died until evict_inode */ 467 F2FS_I(inode)->extent_tree[type] = et; 468 469 return et; 470 } 471 472 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, 473 struct extent_tree *et) 474 { 475 struct rb_node *node, *next; 476 struct extent_node *en; 477 unsigned int count = atomic_read(&et->node_cnt); 478 479 node = rb_first_cached(&et->root); 480 while (node) { 481 next = rb_next(node); 482 en = rb_entry(node, struct extent_node, rb_node); 483 __release_extent_node(sbi, et, en); 484 node = next; 485 } 486 487 return count - atomic_read(&et->node_cnt); 488 } 489 490 static void __drop_largest_extent(struct extent_tree *et, 491 pgoff_t fofs, unsigned int len) 492 { 493 if (fofs < et->largest.fofs + et->largest.len && 494 fofs + len > et->largest.fofs) { 495 et->largest.len = 0; 496 et->largest_updated = true; 497 } 498 } 499 500 void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) 501 { 502 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 503 struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; 504 struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; 505 struct extent_tree *et; 506 struct extent_node *en; 507 struct extent_info ei; 508 509 if (!__may_extent_tree(inode, EX_READ)) { 510 /* drop largest read extent */ 511 if (i_ext && i_ext->len) { 512 f2fs_wait_on_page_writeback(ipage, NODE, true, true); 513 i_ext->len = 0; 514 set_page_dirty(ipage); 515 } 516 goto out; 517 } 518 519 et = __grab_extent_tree(inode, EX_READ); 520 521 if (!i_ext || !i_ext->len) 522 goto out; 523 524 get_read_extent_info(&ei, i_ext); 525 526 write_lock(&et->lock); 527 if (atomic_read(&et->node_cnt)) 528 goto unlock_out; 529 530 en = __attach_extent_node(sbi, et, &ei, NULL, 531 &et->root.rb_root.rb_node, true); 532 if (en) { 533 et->largest = en->ei; 534 et->cached_en = en; 535 536 spin_lock(&eti->extent_lock); 537 list_add_tail(&en->list, &eti->extent_list); 538 spin_unlock(&eti->extent_lock); 539 } 540 unlock_out: 541 write_unlock(&et->lock); 542 out: 543 if (!F2FS_I(inode)->extent_tree[EX_READ]) 544 set_inode_flag(inode, FI_NO_EXTENT); 545 } 546 547 void f2fs_init_age_extent_tree(struct inode *inode) 548 { 549 if (!__init_may_extent_tree(inode, EX_BLOCK_AGE)) 550 return; 551 __grab_extent_tree(inode, EX_BLOCK_AGE); 552 } 553 554 void f2fs_init_extent_tree(struct inode *inode) 555 { 556 /* initialize read cache */ 557 if (__init_may_extent_tree(inode, EX_READ)) 558 __grab_extent_tree(inode, EX_READ); 559 560 /* initialize block age cache */ 561 if (__init_may_extent_tree(inode, EX_BLOCK_AGE)) 562 __grab_extent_tree(inode, EX_BLOCK_AGE); 563 } 564 565 static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, 566 struct extent_info *ei, enum extent_type type) 567 { 568 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 569 struct extent_tree_info *eti = &sbi->extent_tree[type]; 570 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 571 struct extent_node *en; 572 bool ret = false; 573 574 if (!et) 575 return false; 576 577 trace_f2fs_lookup_extent_tree_start(inode, pgofs, type); 578 579 read_lock(&et->lock); 580 581 if (type == EX_READ && 582 et->largest.fofs <= pgofs && 583 et->largest.fofs + et->largest.len > pgofs) { 584 *ei = et->largest; 585 ret = true; 586 stat_inc_largest_node_hit(sbi); 587 goto out; 588 } 589 590 en = (struct extent_node *)f2fs_lookup_rb_tree(&et->root, 591 (struct rb_entry *)et->cached_en, pgofs); 592 if (!en) 593 goto out; 594 595 if (en == et->cached_en) 596 stat_inc_cached_node_hit(sbi, type); 597 else 598 stat_inc_rbtree_node_hit(sbi, type); 599 600 *ei = en->ei; 601 spin_lock(&eti->extent_lock); 602 if (!list_empty(&en->list)) { 603 list_move_tail(&en->list, &eti->extent_list); 604 et->cached_en = en; 605 } 606 spin_unlock(&eti->extent_lock); 607 ret = true; 608 out: 609 stat_inc_total_hit(sbi, type); 610 read_unlock(&et->lock); 611 612 if (type == EX_READ) 613 trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei); 614 else if (type == EX_BLOCK_AGE) 615 trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei); 616 return ret; 617 } 618 619 static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, 620 struct extent_tree *et, struct extent_info *ei, 621 struct extent_node *prev_ex, 622 struct extent_node *next_ex) 623 { 624 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 625 struct extent_node *en = NULL; 626 627 if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) { 628 prev_ex->ei.len += ei->len; 629 ei = &prev_ex->ei; 630 en = prev_ex; 631 } 632 633 if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) { 634 next_ex->ei.fofs = ei->fofs; 635 next_ex->ei.len += ei->len; 636 if (et->type == EX_READ) 637 next_ex->ei.blk = ei->blk; 638 if (en) 639 __release_extent_node(sbi, et, prev_ex); 640 641 en = next_ex; 642 } 643 644 if (!en) 645 return NULL; 646 647 __try_update_largest_extent(et, en); 648 649 spin_lock(&eti->extent_lock); 650 if (!list_empty(&en->list)) { 651 list_move_tail(&en->list, &eti->extent_list); 652 et->cached_en = en; 653 } 654 spin_unlock(&eti->extent_lock); 655 return en; 656 } 657 658 static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, 659 struct extent_tree *et, struct extent_info *ei, 660 struct rb_node **insert_p, 661 struct rb_node *insert_parent, 662 bool leftmost) 663 { 664 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 665 struct rb_node **p; 666 struct rb_node *parent = NULL; 667 struct extent_node *en = NULL; 668 669 if (insert_p && insert_parent) { 670 parent = insert_parent; 671 p = insert_p; 672 goto do_insert; 673 } 674 675 leftmost = true; 676 677 p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent, 678 ei->fofs, &leftmost); 679 do_insert: 680 en = __attach_extent_node(sbi, et, ei, parent, p, leftmost); 681 if (!en) 682 return NULL; 683 684 __try_update_largest_extent(et, en); 685 686 /* update in global extent list */ 687 spin_lock(&eti->extent_lock); 688 list_add_tail(&en->list, &eti->extent_list); 689 et->cached_en = en; 690 spin_unlock(&eti->extent_lock); 691 return en; 692 } 693 694 static void __update_extent_tree_range(struct inode *inode, 695 struct extent_info *tei, enum extent_type type) 696 { 697 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 698 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 699 struct extent_node *en = NULL, *en1 = NULL; 700 struct extent_node *prev_en = NULL, *next_en = NULL; 701 struct extent_info ei, dei, prev; 702 struct rb_node **insert_p = NULL, *insert_parent = NULL; 703 unsigned int fofs = tei->fofs, len = tei->len; 704 unsigned int end = fofs + len; 705 bool updated = false; 706 bool leftmost = false; 707 708 if (!et) 709 return; 710 711 if (type == EX_READ) 712 trace_f2fs_update_read_extent_tree_range(inode, fofs, len, 713 tei->blk, 0); 714 else if (type == EX_BLOCK_AGE) 715 trace_f2fs_update_age_extent_tree_range(inode, fofs, len, 716 tei->age, tei->last_blocks); 717 718 write_lock(&et->lock); 719 720 if (type == EX_READ) { 721 if (is_inode_flag_set(inode, FI_NO_EXTENT)) { 722 write_unlock(&et->lock); 723 return; 724 } 725 726 prev = et->largest; 727 dei.len = 0; 728 729 /* 730 * drop largest extent before lookup, in case it's already 731 * been shrunk from extent tree 732 */ 733 __drop_largest_extent(et, fofs, len); 734 } 735 736 /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ 737 en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root, 738 (struct rb_entry *)et->cached_en, fofs, 739 (struct rb_entry **)&prev_en, 740 (struct rb_entry **)&next_en, 741 &insert_p, &insert_parent, false, 742 &leftmost); 743 if (!en) 744 en = next_en; 745 746 /* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */ 747 while (en && en->ei.fofs < end) { 748 unsigned int org_end; 749 int parts = 0; /* # of parts current extent split into */ 750 751 next_en = en1 = NULL; 752 753 dei = en->ei; 754 org_end = dei.fofs + dei.len; 755 f2fs_bug_on(sbi, fofs >= org_end); 756 757 if (fofs > dei.fofs && (type != EX_READ || 758 fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) { 759 en->ei.len = fofs - en->ei.fofs; 760 prev_en = en; 761 parts = 1; 762 } 763 764 if (end < org_end && (type != EX_READ || 765 org_end - end >= F2FS_MIN_EXTENT_LEN)) { 766 if (parts) { 767 __set_extent_info(&ei, 768 end, org_end - end, 769 end - dei.fofs + dei.blk, false, 770 dei.age, dei.last_blocks, 771 type); 772 en1 = __insert_extent_tree(sbi, et, &ei, 773 NULL, NULL, true); 774 next_en = en1; 775 } else { 776 __set_extent_info(&en->ei, 777 end, en->ei.len - (end - dei.fofs), 778 en->ei.blk + (end - dei.fofs), true, 779 dei.age, dei.last_blocks, 780 type); 781 next_en = en; 782 } 783 parts++; 784 } 785 786 if (!next_en) { 787 struct rb_node *node = rb_next(&en->rb_node); 788 789 next_en = rb_entry_safe(node, struct extent_node, 790 rb_node); 791 } 792 793 if (parts) 794 __try_update_largest_extent(et, en); 795 else 796 __release_extent_node(sbi, et, en); 797 798 /* 799 * if original extent is split into zero or two parts, extent 800 * tree has been altered by deletion or insertion, therefore 801 * invalidate pointers regard to tree. 802 */ 803 if (parts != 1) { 804 insert_p = NULL; 805 insert_parent = NULL; 806 } 807 en = next_en; 808 } 809 810 if (type == EX_BLOCK_AGE) 811 goto update_age_extent_cache; 812 813 /* 3. update extent in read extent cache */ 814 BUG_ON(type != EX_READ); 815 816 if (tei->blk) { 817 __set_extent_info(&ei, fofs, len, tei->blk, false, 818 0, 0, EX_READ); 819 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 820 __insert_extent_tree(sbi, et, &ei, 821 insert_p, insert_parent, leftmost); 822 823 /* give up extent_cache, if split and small updates happen */ 824 if (dei.len >= 1 && 825 prev.len < F2FS_MIN_EXTENT_LEN && 826 et->largest.len < F2FS_MIN_EXTENT_LEN) { 827 et->largest.len = 0; 828 et->largest_updated = true; 829 set_inode_flag(inode, FI_NO_EXTENT); 830 } 831 } 832 833 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 834 __free_extent_tree(sbi, et); 835 836 if (et->largest_updated) { 837 et->largest_updated = false; 838 updated = true; 839 } 840 goto out_read_extent_cache; 841 update_age_extent_cache: 842 if (!tei->last_blocks) 843 goto out_read_extent_cache; 844 845 __set_extent_info(&ei, fofs, len, 0, false, 846 tei->age, tei->last_blocks, EX_BLOCK_AGE); 847 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 848 __insert_extent_tree(sbi, et, &ei, 849 insert_p, insert_parent, leftmost); 850 out_read_extent_cache: 851 write_unlock(&et->lock); 852 853 if (updated) 854 f2fs_mark_inode_dirty_sync(inode, true); 855 } 856 857 #ifdef CONFIG_F2FS_FS_COMPRESSION 858 void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, 859 pgoff_t fofs, block_t blkaddr, unsigned int llen, 860 unsigned int c_len) 861 { 862 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 863 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; 864 struct extent_node *en = NULL; 865 struct extent_node *prev_en = NULL, *next_en = NULL; 866 struct extent_info ei; 867 struct rb_node **insert_p = NULL, *insert_parent = NULL; 868 bool leftmost = false; 869 870 trace_f2fs_update_read_extent_tree_range(inode, fofs, llen, 871 blkaddr, c_len); 872 873 /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */ 874 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 875 return; 876 877 write_lock(&et->lock); 878 879 en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root, 880 (struct rb_entry *)et->cached_en, fofs, 881 (struct rb_entry **)&prev_en, 882 (struct rb_entry **)&next_en, 883 &insert_p, &insert_parent, false, 884 &leftmost); 885 if (en) 886 goto unlock_out; 887 888 __set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ); 889 ei.c_len = c_len; 890 891 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 892 __insert_extent_tree(sbi, et, &ei, 893 insert_p, insert_parent, leftmost); 894 unlock_out: 895 write_unlock(&et->lock); 896 } 897 #endif 898 899 static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi, 900 unsigned long long new, 901 unsigned long long old) 902 { 903 unsigned int rem_old, rem_new; 904 unsigned long long res; 905 unsigned int weight = sbi->last_age_weight; 906 907 res = div_u64_rem(new, 100, &rem_new) * (100 - weight) 908 + div_u64_rem(old, 100, &rem_old) * weight; 909 910 if (rem_new) 911 res += rem_new * (100 - weight) / 100; 912 if (rem_old) 913 res += rem_old * weight / 100; 914 915 return res; 916 } 917 918 /* This returns a new age and allocated blocks in ei */ 919 static int __get_new_block_age(struct inode *inode, struct extent_info *ei, 920 block_t blkaddr) 921 { 922 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 923 loff_t f_size = i_size_read(inode); 924 unsigned long long cur_blocks = 925 atomic64_read(&sbi->allocated_data_blocks); 926 struct extent_info tei = *ei; /* only fofs and len are valid */ 927 928 /* 929 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last 930 * file block even in seq write. So don't record age for newly last file 931 * block here. 932 */ 933 if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) && 934 blkaddr == NEW_ADDR) 935 return -EINVAL; 936 937 if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) { 938 unsigned long long cur_age; 939 940 if (cur_blocks >= tei.last_blocks) 941 cur_age = cur_blocks - tei.last_blocks; 942 else 943 /* allocated_data_blocks overflow */ 944 cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks; 945 946 if (tei.age) 947 ei->age = __calculate_block_age(sbi, cur_age, tei.age); 948 else 949 ei->age = cur_age; 950 ei->last_blocks = cur_blocks; 951 WARN_ON(ei->age > cur_blocks); 952 return 0; 953 } 954 955 f2fs_bug_on(sbi, blkaddr == NULL_ADDR); 956 957 /* the data block was allocated for the first time */ 958 if (blkaddr == NEW_ADDR) 959 goto out; 960 961 if (__is_valid_data_blkaddr(blkaddr) && 962 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { 963 f2fs_bug_on(sbi, 1); 964 return -EINVAL; 965 } 966 out: 967 /* 968 * init block age with zero, this can happen when the block age extent 969 * was reclaimed due to memory constraint or system reboot 970 */ 971 ei->age = 0; 972 ei->last_blocks = cur_blocks; 973 return 0; 974 } 975 976 static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type) 977 { 978 struct extent_info ei = {}; 979 980 if (!__may_extent_tree(dn->inode, type)) 981 return; 982 983 ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + 984 dn->ofs_in_node; 985 ei.len = 1; 986 987 if (type == EX_READ) { 988 if (dn->data_blkaddr == NEW_ADDR) 989 ei.blk = NULL_ADDR; 990 else 991 ei.blk = dn->data_blkaddr; 992 } else if (type == EX_BLOCK_AGE) { 993 if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr)) 994 return; 995 } 996 __update_extent_tree_range(dn->inode, &ei, type); 997 } 998 999 static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink, 1000 enum extent_type type) 1001 { 1002 struct extent_tree_info *eti = &sbi->extent_tree[type]; 1003 struct extent_tree *et, *next; 1004 struct extent_node *en; 1005 unsigned int node_cnt = 0, tree_cnt = 0; 1006 int remained; 1007 1008 if (!atomic_read(&eti->total_zombie_tree)) 1009 goto free_node; 1010 1011 if (!mutex_trylock(&eti->extent_tree_lock)) 1012 goto out; 1013 1014 /* 1. remove unreferenced extent tree */ 1015 list_for_each_entry_safe(et, next, &eti->zombie_list, list) { 1016 if (atomic_read(&et->node_cnt)) { 1017 write_lock(&et->lock); 1018 node_cnt += __free_extent_tree(sbi, et); 1019 write_unlock(&et->lock); 1020 } 1021 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 1022 list_del_init(&et->list); 1023 radix_tree_delete(&eti->extent_tree_root, et->ino); 1024 kmem_cache_free(extent_tree_slab, et); 1025 atomic_dec(&eti->total_ext_tree); 1026 atomic_dec(&eti->total_zombie_tree); 1027 tree_cnt++; 1028 1029 if (node_cnt + tree_cnt >= nr_shrink) 1030 goto unlock_out; 1031 cond_resched(); 1032 } 1033 mutex_unlock(&eti->extent_tree_lock); 1034 1035 free_node: 1036 /* 2. remove LRU extent entries */ 1037 if (!mutex_trylock(&eti->extent_tree_lock)) 1038 goto out; 1039 1040 remained = nr_shrink - (node_cnt + tree_cnt); 1041 1042 spin_lock(&eti->extent_lock); 1043 for (; remained > 0; remained--) { 1044 if (list_empty(&eti->extent_list)) 1045 break; 1046 en = list_first_entry(&eti->extent_list, 1047 struct extent_node, list); 1048 et = en->et; 1049 if (!write_trylock(&et->lock)) { 1050 /* refresh this extent node's position in extent list */ 1051 list_move_tail(&en->list, &eti->extent_list); 1052 continue; 1053 } 1054 1055 list_del_init(&en->list); 1056 spin_unlock(&eti->extent_lock); 1057 1058 __detach_extent_node(sbi, et, en); 1059 1060 write_unlock(&et->lock); 1061 node_cnt++; 1062 spin_lock(&eti->extent_lock); 1063 } 1064 spin_unlock(&eti->extent_lock); 1065 1066 unlock_out: 1067 mutex_unlock(&eti->extent_tree_lock); 1068 out: 1069 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type); 1070 1071 return node_cnt + tree_cnt; 1072 } 1073 1074 /* read extent cache operations */ 1075 bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, 1076 struct extent_info *ei) 1077 { 1078 if (!__may_extent_tree(inode, EX_READ)) 1079 return false; 1080 1081 return __lookup_extent_tree(inode, pgofs, ei, EX_READ); 1082 } 1083 1084 bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index, 1085 block_t *blkaddr) 1086 { 1087 struct extent_info ei = {}; 1088 1089 if (!f2fs_lookup_read_extent_cache(inode, index, &ei)) 1090 return false; 1091 *blkaddr = ei.blk + index - ei.fofs; 1092 return true; 1093 } 1094 1095 void f2fs_update_read_extent_cache(struct dnode_of_data *dn) 1096 { 1097 return __update_extent_cache(dn, EX_READ); 1098 } 1099 1100 void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, 1101 pgoff_t fofs, block_t blkaddr, unsigned int len) 1102 { 1103 struct extent_info ei = { 1104 .fofs = fofs, 1105 .len = len, 1106 .blk = blkaddr, 1107 }; 1108 1109 if (!__may_extent_tree(dn->inode, EX_READ)) 1110 return; 1111 1112 __update_extent_tree_range(dn->inode, &ei, EX_READ); 1113 } 1114 1115 unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 1116 { 1117 if (!test_opt(sbi, READ_EXTENT_CACHE)) 1118 return 0; 1119 1120 return __shrink_extent_tree(sbi, nr_shrink, EX_READ); 1121 } 1122 1123 /* block age extent cache operations */ 1124 bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs, 1125 struct extent_info *ei) 1126 { 1127 if (!__may_extent_tree(inode, EX_BLOCK_AGE)) 1128 return false; 1129 1130 return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE); 1131 } 1132 1133 void f2fs_update_age_extent_cache(struct dnode_of_data *dn) 1134 { 1135 return __update_extent_cache(dn, EX_BLOCK_AGE); 1136 } 1137 1138 void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, 1139 pgoff_t fofs, unsigned int len) 1140 { 1141 struct extent_info ei = { 1142 .fofs = fofs, 1143 .len = len, 1144 }; 1145 1146 if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE)) 1147 return; 1148 1149 __update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE); 1150 } 1151 1152 unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 1153 { 1154 if (!test_opt(sbi, AGE_EXTENT_CACHE)) 1155 return 0; 1156 1157 return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE); 1158 } 1159 1160 static unsigned int __destroy_extent_node(struct inode *inode, 1161 enum extent_type type) 1162 { 1163 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1164 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1165 unsigned int node_cnt = 0; 1166 1167 if (!et || !atomic_read(&et->node_cnt)) 1168 return 0; 1169 1170 write_lock(&et->lock); 1171 node_cnt = __free_extent_tree(sbi, et); 1172 write_unlock(&et->lock); 1173 1174 return node_cnt; 1175 } 1176 1177 void f2fs_destroy_extent_node(struct inode *inode) 1178 { 1179 __destroy_extent_node(inode, EX_READ); 1180 __destroy_extent_node(inode, EX_BLOCK_AGE); 1181 } 1182 1183 static void __drop_extent_tree(struct inode *inode, enum extent_type type) 1184 { 1185 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1186 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1187 bool updated = false; 1188 1189 if (!__may_extent_tree(inode, type)) 1190 return; 1191 1192 write_lock(&et->lock); 1193 __free_extent_tree(sbi, et); 1194 if (type == EX_READ) { 1195 set_inode_flag(inode, FI_NO_EXTENT); 1196 if (et->largest.len) { 1197 et->largest.len = 0; 1198 updated = true; 1199 } 1200 } 1201 write_unlock(&et->lock); 1202 if (updated) 1203 f2fs_mark_inode_dirty_sync(inode, true); 1204 } 1205 1206 void f2fs_drop_extent_tree(struct inode *inode) 1207 { 1208 __drop_extent_tree(inode, EX_READ); 1209 __drop_extent_tree(inode, EX_BLOCK_AGE); 1210 } 1211 1212 static void __destroy_extent_tree(struct inode *inode, enum extent_type type) 1213 { 1214 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1215 struct extent_tree_info *eti = &sbi->extent_tree[type]; 1216 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1217 unsigned int node_cnt = 0; 1218 1219 if (!et) 1220 return; 1221 1222 if (inode->i_nlink && !is_bad_inode(inode) && 1223 atomic_read(&et->node_cnt)) { 1224 mutex_lock(&eti->extent_tree_lock); 1225 list_add_tail(&et->list, &eti->zombie_list); 1226 atomic_inc(&eti->total_zombie_tree); 1227 mutex_unlock(&eti->extent_tree_lock); 1228 return; 1229 } 1230 1231 /* free all extent info belong to this extent tree */ 1232 node_cnt = __destroy_extent_node(inode, type); 1233 1234 /* delete extent tree entry in radix tree */ 1235 mutex_lock(&eti->extent_tree_lock); 1236 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 1237 radix_tree_delete(&eti->extent_tree_root, inode->i_ino); 1238 kmem_cache_free(extent_tree_slab, et); 1239 atomic_dec(&eti->total_ext_tree); 1240 mutex_unlock(&eti->extent_tree_lock); 1241 1242 F2FS_I(inode)->extent_tree[type] = NULL; 1243 1244 trace_f2fs_destroy_extent_tree(inode, node_cnt, type); 1245 } 1246 1247 void f2fs_destroy_extent_tree(struct inode *inode) 1248 { 1249 __destroy_extent_tree(inode, EX_READ); 1250 __destroy_extent_tree(inode, EX_BLOCK_AGE); 1251 } 1252 1253 static void __init_extent_tree_info(struct extent_tree_info *eti) 1254 { 1255 INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO); 1256 mutex_init(&eti->extent_tree_lock); 1257 INIT_LIST_HEAD(&eti->extent_list); 1258 spin_lock_init(&eti->extent_lock); 1259 atomic_set(&eti->total_ext_tree, 0); 1260 INIT_LIST_HEAD(&eti->zombie_list); 1261 atomic_set(&eti->total_zombie_tree, 0); 1262 atomic_set(&eti->total_ext_node, 0); 1263 } 1264 1265 void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) 1266 { 1267 __init_extent_tree_info(&sbi->extent_tree[EX_READ]); 1268 __init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]); 1269 1270 /* initialize for block age extents */ 1271 atomic64_set(&sbi->allocated_data_blocks, 0); 1272 sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD; 1273 sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD; 1274 sbi->last_age_weight = LAST_AGE_WEIGHT; 1275 } 1276 1277 int __init f2fs_create_extent_cache(void) 1278 { 1279 extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", 1280 sizeof(struct extent_tree)); 1281 if (!extent_tree_slab) 1282 return -ENOMEM; 1283 extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", 1284 sizeof(struct extent_node)); 1285 if (!extent_node_slab) { 1286 kmem_cache_destroy(extent_tree_slab); 1287 return -ENOMEM; 1288 } 1289 return 0; 1290 } 1291 1292 void f2fs_destroy_extent_cache(void) 1293 { 1294 kmem_cache_destroy(extent_node_slab); 1295 kmem_cache_destroy(extent_tree_slab); 1296 } 1297