1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * f2fs extent cache support 4 * 5 * Copyright (c) 2015 Motorola Mobility 6 * Copyright (c) 2015 Samsung Electronics 7 * Authors: Jaegeuk Kim <jaegeuk@kernel.org> 8 * Chao Yu <chao2.yu@samsung.com> 9 * 10 * block_age-based extent cache added by: 11 * Copyright (c) 2022 xiaomi Co., Ltd. 12 * http://www.xiaomi.com/ 13 */ 14 15 #include <linux/fs.h> 16 #include <linux/f2fs_fs.h> 17 18 #include "f2fs.h" 19 #include "node.h" 20 #include <trace/events/f2fs.h> 21 22 bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) 23 { 24 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 25 struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; 26 struct extent_info ei; 27 28 get_read_extent_info(&ei, i_ext); 29 30 if (!ei.len) 31 return true; 32 33 if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) || 34 !f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1, 35 DATA_GENERIC_ENHANCE)) { 36 f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", 37 __func__, inode->i_ino, 38 ei.blk, ei.fofs, ei.len); 39 return false; 40 } 41 return true; 42 } 43 44 static void __set_extent_info(struct extent_info *ei, 45 unsigned int fofs, unsigned int len, 46 block_t blk, bool keep_clen, 47 unsigned long age, unsigned long last_blocks, 48 enum extent_type type) 49 { 50 ei->fofs = fofs; 51 ei->len = len; 52 53 if (type == EX_READ) { 54 ei->blk = blk; 55 if (keep_clen) 56 return; 57 #ifdef CONFIG_F2FS_FS_COMPRESSION 58 ei->c_len = 0; 59 #endif 60 } else if (type == EX_BLOCK_AGE) { 61 ei->age = age; 62 ei->last_blocks = last_blocks; 63 } 64 } 65 66 static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) 67 { 68 if (type == EX_READ) 69 return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) && 70 S_ISREG(inode->i_mode); 71 if (type == EX_BLOCK_AGE) 72 return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) && 73 (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)); 74 return false; 75 } 76 77 static bool __may_extent_tree(struct inode *inode, enum extent_type type) 78 { 79 /* 80 * for recovered files during mount do not create extents 81 * if shrinker is not registered. 82 */ 83 if (list_empty(&F2FS_I_SB(inode)->s_list)) 84 return false; 85 86 if (!__init_may_extent_tree(inode, type)) 87 return false; 88 89 if (type == EX_READ) { 90 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 91 return false; 92 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && 93 !f2fs_sb_has_readonly(F2FS_I_SB(inode))) 94 return false; 95 } else if (type == EX_BLOCK_AGE) { 96 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) 97 return false; 98 if (file_is_cold(inode)) 99 return false; 100 } 101 return true; 102 } 103 104 static void __try_update_largest_extent(struct extent_tree *et, 105 struct extent_node *en) 106 { 107 if (et->type != EX_READ) 108 return; 109 if (en->ei.len <= et->largest.len) 110 return; 111 112 et->largest = en->ei; 113 et->largest_updated = true; 114 } 115 116 static bool __is_extent_mergeable(struct extent_info *back, 117 struct extent_info *front, enum extent_type type) 118 { 119 if (type == EX_READ) { 120 #ifdef CONFIG_F2FS_FS_COMPRESSION 121 if (back->c_len && back->len != back->c_len) 122 return false; 123 if (front->c_len && front->len != front->c_len) 124 return false; 125 #endif 126 return (back->fofs + back->len == front->fofs && 127 back->blk + back->len == front->blk); 128 } else if (type == EX_BLOCK_AGE) { 129 return (back->fofs + back->len == front->fofs && 130 abs(back->age - front->age) <= SAME_AGE_REGION && 131 abs(back->last_blocks - front->last_blocks) <= 132 SAME_AGE_REGION); 133 } 134 return false; 135 } 136 137 static bool __is_back_mergeable(struct extent_info *cur, 138 struct extent_info *back, enum extent_type type) 139 { 140 return __is_extent_mergeable(back, cur, type); 141 } 142 143 static bool __is_front_mergeable(struct extent_info *cur, 144 struct extent_info *front, enum extent_type type) 145 { 146 return __is_extent_mergeable(cur, front, type); 147 } 148 149 static struct extent_node *__lookup_extent_node(struct rb_root_cached *root, 150 struct extent_node *cached_en, unsigned int fofs) 151 { 152 struct rb_node *node = root->rb_root.rb_node; 153 struct extent_node *en; 154 155 /* check a cached entry */ 156 if (cached_en && cached_en->ei.fofs <= fofs && 157 cached_en->ei.fofs + cached_en->ei.len > fofs) 158 return cached_en; 159 160 /* check rb_tree */ 161 while (node) { 162 en = rb_entry(node, struct extent_node, rb_node); 163 164 if (fofs < en->ei.fofs) 165 node = node->rb_left; 166 else if (fofs >= en->ei.fofs + en->ei.len) 167 node = node->rb_right; 168 else 169 return en; 170 } 171 return NULL; 172 } 173 174 /* 175 * lookup rb entry in position of @fofs in rb-tree, 176 * if hit, return the entry, otherwise, return NULL 177 * @prev_ex: extent before fofs 178 * @next_ex: extent after fofs 179 * @insert_p: insert point for new extent at fofs 180 * in order to simplify the insertion after. 181 * tree must stay unchanged between lookup and insertion. 182 */ 183 static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root, 184 struct extent_node *cached_en, 185 unsigned int fofs, 186 struct extent_node **prev_entry, 187 struct extent_node **next_entry, 188 struct rb_node ***insert_p, 189 struct rb_node **insert_parent, 190 bool *leftmost) 191 { 192 struct rb_node **pnode = &root->rb_root.rb_node; 193 struct rb_node *parent = NULL, *tmp_node; 194 struct extent_node *en = cached_en; 195 196 *insert_p = NULL; 197 *insert_parent = NULL; 198 *prev_entry = NULL; 199 *next_entry = NULL; 200 201 if (RB_EMPTY_ROOT(&root->rb_root)) 202 return NULL; 203 204 if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs) 205 goto lookup_neighbors; 206 207 *leftmost = true; 208 209 while (*pnode) { 210 parent = *pnode; 211 en = rb_entry(*pnode, struct extent_node, rb_node); 212 213 if (fofs < en->ei.fofs) { 214 pnode = &(*pnode)->rb_left; 215 } else if (fofs >= en->ei.fofs + en->ei.len) { 216 pnode = &(*pnode)->rb_right; 217 *leftmost = false; 218 } else { 219 goto lookup_neighbors; 220 } 221 } 222 223 *insert_p = pnode; 224 *insert_parent = parent; 225 226 en = rb_entry(parent, struct extent_node, rb_node); 227 tmp_node = parent; 228 if (parent && fofs > en->ei.fofs) 229 tmp_node = rb_next(parent); 230 *next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node); 231 232 tmp_node = parent; 233 if (parent && fofs < en->ei.fofs) 234 tmp_node = rb_prev(parent); 235 *prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node); 236 return NULL; 237 238 lookup_neighbors: 239 if (fofs == en->ei.fofs) { 240 /* lookup prev node for merging backward later */ 241 tmp_node = rb_prev(&en->rb_node); 242 *prev_entry = rb_entry_safe(tmp_node, 243 struct extent_node, rb_node); 244 } 245 if (fofs == en->ei.fofs + en->ei.len - 1) { 246 /* lookup next node for merging frontward later */ 247 tmp_node = rb_next(&en->rb_node); 248 *next_entry = rb_entry_safe(tmp_node, 249 struct extent_node, rb_node); 250 } 251 return en; 252 } 253 254 static struct kmem_cache *extent_tree_slab; 255 static struct kmem_cache *extent_node_slab; 256 257 static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, 258 struct extent_tree *et, struct extent_info *ei, 259 struct rb_node *parent, struct rb_node **p, 260 bool leftmost) 261 { 262 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 263 struct extent_node *en; 264 265 en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi); 266 if (!en) 267 return NULL; 268 269 en->ei = *ei; 270 INIT_LIST_HEAD(&en->list); 271 en->et = et; 272 273 rb_link_node(&en->rb_node, parent, p); 274 rb_insert_color_cached(&en->rb_node, &et->root, leftmost); 275 atomic_inc(&et->node_cnt); 276 atomic_inc(&eti->total_ext_node); 277 return en; 278 } 279 280 static void __detach_extent_node(struct f2fs_sb_info *sbi, 281 struct extent_tree *et, struct extent_node *en) 282 { 283 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 284 285 rb_erase_cached(&en->rb_node, &et->root); 286 atomic_dec(&et->node_cnt); 287 atomic_dec(&eti->total_ext_node); 288 289 if (et->cached_en == en) 290 et->cached_en = NULL; 291 kmem_cache_free(extent_node_slab, en); 292 } 293 294 /* 295 * Flow to release an extent_node: 296 * 1. list_del_init 297 * 2. __detach_extent_node 298 * 3. kmem_cache_free. 299 */ 300 static void __release_extent_node(struct f2fs_sb_info *sbi, 301 struct extent_tree *et, struct extent_node *en) 302 { 303 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 304 305 spin_lock(&eti->extent_lock); 306 f2fs_bug_on(sbi, list_empty(&en->list)); 307 list_del_init(&en->list); 308 spin_unlock(&eti->extent_lock); 309 310 __detach_extent_node(sbi, et, en); 311 } 312 313 static struct extent_tree *__grab_extent_tree(struct inode *inode, 314 enum extent_type type) 315 { 316 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 317 struct extent_tree_info *eti = &sbi->extent_tree[type]; 318 struct extent_tree *et; 319 nid_t ino = inode->i_ino; 320 321 mutex_lock(&eti->extent_tree_lock); 322 et = radix_tree_lookup(&eti->extent_tree_root, ino); 323 if (!et) { 324 et = f2fs_kmem_cache_alloc(extent_tree_slab, 325 GFP_NOFS, true, NULL); 326 f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et); 327 memset(et, 0, sizeof(struct extent_tree)); 328 et->ino = ino; 329 et->type = type; 330 et->root = RB_ROOT_CACHED; 331 et->cached_en = NULL; 332 rwlock_init(&et->lock); 333 INIT_LIST_HEAD(&et->list); 334 atomic_set(&et->node_cnt, 0); 335 atomic_inc(&eti->total_ext_tree); 336 } else { 337 atomic_dec(&eti->total_zombie_tree); 338 list_del_init(&et->list); 339 } 340 mutex_unlock(&eti->extent_tree_lock); 341 342 /* never died until evict_inode */ 343 F2FS_I(inode)->extent_tree[type] = et; 344 345 return et; 346 } 347 348 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, 349 struct extent_tree *et) 350 { 351 struct rb_node *node, *next; 352 struct extent_node *en; 353 unsigned int count = atomic_read(&et->node_cnt); 354 355 node = rb_first_cached(&et->root); 356 while (node) { 357 next = rb_next(node); 358 en = rb_entry(node, struct extent_node, rb_node); 359 __release_extent_node(sbi, et, en); 360 node = next; 361 } 362 363 return count - atomic_read(&et->node_cnt); 364 } 365 366 static void __drop_largest_extent(struct extent_tree *et, 367 pgoff_t fofs, unsigned int len) 368 { 369 if (fofs < (pgoff_t)et->largest.fofs + et->largest.len && 370 fofs + len > et->largest.fofs) { 371 et->largest.len = 0; 372 et->largest_updated = true; 373 } 374 } 375 376 void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) 377 { 378 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 379 struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; 380 struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; 381 struct extent_tree *et; 382 struct extent_node *en; 383 struct extent_info ei; 384 385 if (!__may_extent_tree(inode, EX_READ)) { 386 /* drop largest read extent */ 387 if (i_ext->len) { 388 f2fs_wait_on_page_writeback(ipage, NODE, true, true); 389 i_ext->len = 0; 390 set_page_dirty(ipage); 391 } 392 set_inode_flag(inode, FI_NO_EXTENT); 393 return; 394 } 395 396 et = __grab_extent_tree(inode, EX_READ); 397 398 get_read_extent_info(&ei, i_ext); 399 400 write_lock(&et->lock); 401 if (atomic_read(&et->node_cnt) || !ei.len) 402 goto skip; 403 404 en = __attach_extent_node(sbi, et, &ei, NULL, 405 &et->root.rb_root.rb_node, true); 406 if (en) { 407 et->largest = en->ei; 408 et->cached_en = en; 409 410 spin_lock(&eti->extent_lock); 411 list_add_tail(&en->list, &eti->extent_list); 412 spin_unlock(&eti->extent_lock); 413 } 414 skip: 415 /* Let's drop, if checkpoint got corrupted. */ 416 if (f2fs_cp_error(sbi)) { 417 et->largest.len = 0; 418 et->largest_updated = true; 419 } 420 write_unlock(&et->lock); 421 } 422 423 void f2fs_init_age_extent_tree(struct inode *inode) 424 { 425 if (!__init_may_extent_tree(inode, EX_BLOCK_AGE)) 426 return; 427 __grab_extent_tree(inode, EX_BLOCK_AGE); 428 } 429 430 void f2fs_init_extent_tree(struct inode *inode) 431 { 432 /* initialize read cache */ 433 if (__init_may_extent_tree(inode, EX_READ)) 434 __grab_extent_tree(inode, EX_READ); 435 436 /* initialize block age cache */ 437 if (__init_may_extent_tree(inode, EX_BLOCK_AGE)) 438 __grab_extent_tree(inode, EX_BLOCK_AGE); 439 } 440 441 static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, 442 struct extent_info *ei, enum extent_type type) 443 { 444 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 445 struct extent_tree_info *eti = &sbi->extent_tree[type]; 446 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 447 struct extent_node *en; 448 bool ret = false; 449 450 if (!et) 451 return false; 452 453 trace_f2fs_lookup_extent_tree_start(inode, pgofs, type); 454 455 read_lock(&et->lock); 456 457 if (type == EX_READ && 458 et->largest.fofs <= pgofs && 459 (pgoff_t)et->largest.fofs + et->largest.len > pgofs) { 460 *ei = et->largest; 461 ret = true; 462 stat_inc_largest_node_hit(sbi); 463 goto out; 464 } 465 466 en = __lookup_extent_node(&et->root, et->cached_en, pgofs); 467 if (!en) 468 goto out; 469 470 if (en == et->cached_en) 471 stat_inc_cached_node_hit(sbi, type); 472 else 473 stat_inc_rbtree_node_hit(sbi, type); 474 475 *ei = en->ei; 476 spin_lock(&eti->extent_lock); 477 if (!list_empty(&en->list)) { 478 list_move_tail(&en->list, &eti->extent_list); 479 et->cached_en = en; 480 } 481 spin_unlock(&eti->extent_lock); 482 ret = true; 483 out: 484 stat_inc_total_hit(sbi, type); 485 read_unlock(&et->lock); 486 487 if (type == EX_READ) 488 trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei); 489 else if (type == EX_BLOCK_AGE) 490 trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei); 491 return ret; 492 } 493 494 static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, 495 struct extent_tree *et, struct extent_info *ei, 496 struct extent_node *prev_ex, 497 struct extent_node *next_ex) 498 { 499 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 500 struct extent_node *en = NULL; 501 502 if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) { 503 prev_ex->ei.len += ei->len; 504 ei = &prev_ex->ei; 505 en = prev_ex; 506 } 507 508 if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) { 509 next_ex->ei.fofs = ei->fofs; 510 next_ex->ei.len += ei->len; 511 if (et->type == EX_READ) 512 next_ex->ei.blk = ei->blk; 513 if (en) 514 __release_extent_node(sbi, et, prev_ex); 515 516 en = next_ex; 517 } 518 519 if (!en) 520 return NULL; 521 522 __try_update_largest_extent(et, en); 523 524 spin_lock(&eti->extent_lock); 525 if (!list_empty(&en->list)) { 526 list_move_tail(&en->list, &eti->extent_list); 527 et->cached_en = en; 528 } 529 spin_unlock(&eti->extent_lock); 530 return en; 531 } 532 533 static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, 534 struct extent_tree *et, struct extent_info *ei, 535 struct rb_node **insert_p, 536 struct rb_node *insert_parent, 537 bool leftmost) 538 { 539 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 540 struct rb_node **p = &et->root.rb_root.rb_node; 541 struct rb_node *parent = NULL; 542 struct extent_node *en = NULL; 543 544 if (insert_p && insert_parent) { 545 parent = insert_parent; 546 p = insert_p; 547 goto do_insert; 548 } 549 550 leftmost = true; 551 552 /* look up extent_node in the rb tree */ 553 while (*p) { 554 parent = *p; 555 en = rb_entry(parent, struct extent_node, rb_node); 556 557 if (ei->fofs < en->ei.fofs) { 558 p = &(*p)->rb_left; 559 } else if (ei->fofs >= en->ei.fofs + en->ei.len) { 560 p = &(*p)->rb_right; 561 leftmost = false; 562 } else { 563 f2fs_bug_on(sbi, 1); 564 } 565 } 566 567 do_insert: 568 en = __attach_extent_node(sbi, et, ei, parent, p, leftmost); 569 if (!en) 570 return NULL; 571 572 __try_update_largest_extent(et, en); 573 574 /* update in global extent list */ 575 spin_lock(&eti->extent_lock); 576 list_add_tail(&en->list, &eti->extent_list); 577 et->cached_en = en; 578 spin_unlock(&eti->extent_lock); 579 return en; 580 } 581 582 static void __update_extent_tree_range(struct inode *inode, 583 struct extent_info *tei, enum extent_type type) 584 { 585 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 586 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 587 struct extent_node *en = NULL, *en1 = NULL; 588 struct extent_node *prev_en = NULL, *next_en = NULL; 589 struct extent_info ei, dei, prev; 590 struct rb_node **insert_p = NULL, *insert_parent = NULL; 591 unsigned int fofs = tei->fofs, len = tei->len; 592 unsigned int end = fofs + len; 593 bool updated = false; 594 bool leftmost = false; 595 596 if (!et) 597 return; 598 599 if (type == EX_READ) 600 trace_f2fs_update_read_extent_tree_range(inode, fofs, len, 601 tei->blk, 0); 602 else if (type == EX_BLOCK_AGE) 603 trace_f2fs_update_age_extent_tree_range(inode, fofs, len, 604 tei->age, tei->last_blocks); 605 606 write_lock(&et->lock); 607 608 if (type == EX_READ) { 609 if (is_inode_flag_set(inode, FI_NO_EXTENT)) { 610 write_unlock(&et->lock); 611 return; 612 } 613 614 prev = et->largest; 615 dei.len = 0; 616 617 /* 618 * drop largest extent before lookup, in case it's already 619 * been shrunk from extent tree 620 */ 621 __drop_largest_extent(et, fofs, len); 622 } 623 624 /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ 625 en = __lookup_extent_node_ret(&et->root, 626 et->cached_en, fofs, 627 &prev_en, &next_en, 628 &insert_p, &insert_parent, 629 &leftmost); 630 if (!en) 631 en = next_en; 632 633 /* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */ 634 while (en && en->ei.fofs < end) { 635 unsigned int org_end; 636 int parts = 0; /* # of parts current extent split into */ 637 638 next_en = en1 = NULL; 639 640 dei = en->ei; 641 org_end = dei.fofs + dei.len; 642 f2fs_bug_on(sbi, fofs >= org_end); 643 644 if (fofs > dei.fofs && (type != EX_READ || 645 fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) { 646 en->ei.len = fofs - en->ei.fofs; 647 prev_en = en; 648 parts = 1; 649 } 650 651 if (end < org_end && (type != EX_READ || 652 org_end - end >= F2FS_MIN_EXTENT_LEN)) { 653 if (parts) { 654 __set_extent_info(&ei, 655 end, org_end - end, 656 end - dei.fofs + dei.blk, false, 657 dei.age, dei.last_blocks, 658 type); 659 en1 = __insert_extent_tree(sbi, et, &ei, 660 NULL, NULL, true); 661 next_en = en1; 662 } else { 663 __set_extent_info(&en->ei, 664 end, en->ei.len - (end - dei.fofs), 665 en->ei.blk + (end - dei.fofs), true, 666 dei.age, dei.last_blocks, 667 type); 668 next_en = en; 669 } 670 parts++; 671 } 672 673 if (!next_en) { 674 struct rb_node *node = rb_next(&en->rb_node); 675 676 next_en = rb_entry_safe(node, struct extent_node, 677 rb_node); 678 } 679 680 if (parts) 681 __try_update_largest_extent(et, en); 682 else 683 __release_extent_node(sbi, et, en); 684 685 /* 686 * if original extent is split into zero or two parts, extent 687 * tree has been altered by deletion or insertion, therefore 688 * invalidate pointers regard to tree. 689 */ 690 if (parts != 1) { 691 insert_p = NULL; 692 insert_parent = NULL; 693 } 694 en = next_en; 695 } 696 697 if (type == EX_BLOCK_AGE) 698 goto update_age_extent_cache; 699 700 /* 3. update extent in read extent cache */ 701 BUG_ON(type != EX_READ); 702 703 if (tei->blk) { 704 __set_extent_info(&ei, fofs, len, tei->blk, false, 705 0, 0, EX_READ); 706 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 707 __insert_extent_tree(sbi, et, &ei, 708 insert_p, insert_parent, leftmost); 709 710 /* give up extent_cache, if split and small updates happen */ 711 if (dei.len >= 1 && 712 prev.len < F2FS_MIN_EXTENT_LEN && 713 et->largest.len < F2FS_MIN_EXTENT_LEN) { 714 et->largest.len = 0; 715 et->largest_updated = true; 716 set_inode_flag(inode, FI_NO_EXTENT); 717 } 718 } 719 720 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 721 __free_extent_tree(sbi, et); 722 723 if (et->largest_updated) { 724 et->largest_updated = false; 725 updated = true; 726 } 727 goto out_read_extent_cache; 728 update_age_extent_cache: 729 if (!tei->last_blocks) 730 goto out_read_extent_cache; 731 732 __set_extent_info(&ei, fofs, len, 0, false, 733 tei->age, tei->last_blocks, EX_BLOCK_AGE); 734 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 735 __insert_extent_tree(sbi, et, &ei, 736 insert_p, insert_parent, leftmost); 737 out_read_extent_cache: 738 write_unlock(&et->lock); 739 740 if (updated) 741 f2fs_mark_inode_dirty_sync(inode, true); 742 } 743 744 #ifdef CONFIG_F2FS_FS_COMPRESSION 745 void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, 746 pgoff_t fofs, block_t blkaddr, unsigned int llen, 747 unsigned int c_len) 748 { 749 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 750 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; 751 struct extent_node *en = NULL; 752 struct extent_node *prev_en = NULL, *next_en = NULL; 753 struct extent_info ei; 754 struct rb_node **insert_p = NULL, *insert_parent = NULL; 755 bool leftmost = false; 756 757 trace_f2fs_update_read_extent_tree_range(inode, fofs, llen, 758 blkaddr, c_len); 759 760 /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */ 761 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 762 return; 763 764 write_lock(&et->lock); 765 766 en = __lookup_extent_node_ret(&et->root, 767 et->cached_en, fofs, 768 &prev_en, &next_en, 769 &insert_p, &insert_parent, 770 &leftmost); 771 if (en) 772 goto unlock_out; 773 774 __set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ); 775 ei.c_len = c_len; 776 777 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 778 __insert_extent_tree(sbi, et, &ei, 779 insert_p, insert_parent, leftmost); 780 unlock_out: 781 write_unlock(&et->lock); 782 } 783 #endif 784 785 static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi, 786 unsigned long long new, 787 unsigned long long old) 788 { 789 unsigned int rem_old, rem_new; 790 unsigned long long res; 791 unsigned int weight = sbi->last_age_weight; 792 793 res = div_u64_rem(new, 100, &rem_new) * (100 - weight) 794 + div_u64_rem(old, 100, &rem_old) * weight; 795 796 if (rem_new) 797 res += rem_new * (100 - weight) / 100; 798 if (rem_old) 799 res += rem_old * weight / 100; 800 801 return res; 802 } 803 804 /* This returns a new age and allocated blocks in ei */ 805 static int __get_new_block_age(struct inode *inode, struct extent_info *ei, 806 block_t blkaddr) 807 { 808 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 809 loff_t f_size = i_size_read(inode); 810 unsigned long long cur_blocks = 811 atomic64_read(&sbi->allocated_data_blocks); 812 struct extent_info tei = *ei; /* only fofs and len are valid */ 813 814 /* 815 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last 816 * file block even in seq write. So don't record age for newly last file 817 * block here. 818 */ 819 if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) && 820 blkaddr == NEW_ADDR) 821 return -EINVAL; 822 823 if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) { 824 unsigned long long cur_age; 825 826 if (cur_blocks >= tei.last_blocks) 827 cur_age = cur_blocks - tei.last_blocks; 828 else 829 /* allocated_data_blocks overflow */ 830 cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks; 831 832 if (tei.age) 833 ei->age = __calculate_block_age(sbi, cur_age, tei.age); 834 else 835 ei->age = cur_age; 836 ei->last_blocks = cur_blocks; 837 WARN_ON(ei->age > cur_blocks); 838 return 0; 839 } 840 841 f2fs_bug_on(sbi, blkaddr == NULL_ADDR); 842 843 /* the data block was allocated for the first time */ 844 if (blkaddr == NEW_ADDR) 845 goto out; 846 847 if (__is_valid_data_blkaddr(blkaddr) && 848 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) 849 return -EINVAL; 850 out: 851 /* 852 * init block age with zero, this can happen when the block age extent 853 * was reclaimed due to memory constraint or system reboot 854 */ 855 ei->age = 0; 856 ei->last_blocks = cur_blocks; 857 return 0; 858 } 859 860 static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type) 861 { 862 struct extent_info ei = {}; 863 864 if (!__may_extent_tree(dn->inode, type)) 865 return; 866 867 ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + 868 dn->ofs_in_node; 869 ei.len = 1; 870 871 if (type == EX_READ) { 872 if (dn->data_blkaddr == NEW_ADDR) 873 ei.blk = NULL_ADDR; 874 else 875 ei.blk = dn->data_blkaddr; 876 } else if (type == EX_BLOCK_AGE) { 877 if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr)) 878 return; 879 } 880 __update_extent_tree_range(dn->inode, &ei, type); 881 } 882 883 static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink, 884 enum extent_type type) 885 { 886 struct extent_tree_info *eti = &sbi->extent_tree[type]; 887 struct extent_tree *et, *next; 888 struct extent_node *en; 889 unsigned int node_cnt = 0, tree_cnt = 0; 890 int remained; 891 892 if (!atomic_read(&eti->total_zombie_tree)) 893 goto free_node; 894 895 if (!mutex_trylock(&eti->extent_tree_lock)) 896 goto out; 897 898 /* 1. remove unreferenced extent tree */ 899 list_for_each_entry_safe(et, next, &eti->zombie_list, list) { 900 if (atomic_read(&et->node_cnt)) { 901 write_lock(&et->lock); 902 node_cnt += __free_extent_tree(sbi, et); 903 write_unlock(&et->lock); 904 } 905 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 906 list_del_init(&et->list); 907 radix_tree_delete(&eti->extent_tree_root, et->ino); 908 kmem_cache_free(extent_tree_slab, et); 909 atomic_dec(&eti->total_ext_tree); 910 atomic_dec(&eti->total_zombie_tree); 911 tree_cnt++; 912 913 if (node_cnt + tree_cnt >= nr_shrink) 914 goto unlock_out; 915 cond_resched(); 916 } 917 mutex_unlock(&eti->extent_tree_lock); 918 919 free_node: 920 /* 2. remove LRU extent entries */ 921 if (!mutex_trylock(&eti->extent_tree_lock)) 922 goto out; 923 924 remained = nr_shrink - (node_cnt + tree_cnt); 925 926 spin_lock(&eti->extent_lock); 927 for (; remained > 0; remained--) { 928 if (list_empty(&eti->extent_list)) 929 break; 930 en = list_first_entry(&eti->extent_list, 931 struct extent_node, list); 932 et = en->et; 933 if (!write_trylock(&et->lock)) { 934 /* refresh this extent node's position in extent list */ 935 list_move_tail(&en->list, &eti->extent_list); 936 continue; 937 } 938 939 list_del_init(&en->list); 940 spin_unlock(&eti->extent_lock); 941 942 __detach_extent_node(sbi, et, en); 943 944 write_unlock(&et->lock); 945 node_cnt++; 946 spin_lock(&eti->extent_lock); 947 } 948 spin_unlock(&eti->extent_lock); 949 950 unlock_out: 951 mutex_unlock(&eti->extent_tree_lock); 952 out: 953 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type); 954 955 return node_cnt + tree_cnt; 956 } 957 958 /* read extent cache operations */ 959 bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, 960 struct extent_info *ei) 961 { 962 if (!__may_extent_tree(inode, EX_READ)) 963 return false; 964 965 return __lookup_extent_tree(inode, pgofs, ei, EX_READ); 966 } 967 968 bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index, 969 block_t *blkaddr) 970 { 971 struct extent_info ei = {}; 972 973 if (!f2fs_lookup_read_extent_cache(inode, index, &ei)) 974 return false; 975 *blkaddr = ei.blk + index - ei.fofs; 976 return true; 977 } 978 979 void f2fs_update_read_extent_cache(struct dnode_of_data *dn) 980 { 981 return __update_extent_cache(dn, EX_READ); 982 } 983 984 void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, 985 pgoff_t fofs, block_t blkaddr, unsigned int len) 986 { 987 struct extent_info ei = { 988 .fofs = fofs, 989 .len = len, 990 .blk = blkaddr, 991 }; 992 993 if (!__may_extent_tree(dn->inode, EX_READ)) 994 return; 995 996 __update_extent_tree_range(dn->inode, &ei, EX_READ); 997 } 998 999 unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 1000 { 1001 if (!test_opt(sbi, READ_EXTENT_CACHE)) 1002 return 0; 1003 1004 return __shrink_extent_tree(sbi, nr_shrink, EX_READ); 1005 } 1006 1007 /* block age extent cache operations */ 1008 bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs, 1009 struct extent_info *ei) 1010 { 1011 if (!__may_extent_tree(inode, EX_BLOCK_AGE)) 1012 return false; 1013 1014 return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE); 1015 } 1016 1017 void f2fs_update_age_extent_cache(struct dnode_of_data *dn) 1018 { 1019 return __update_extent_cache(dn, EX_BLOCK_AGE); 1020 } 1021 1022 void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, 1023 pgoff_t fofs, unsigned int len) 1024 { 1025 struct extent_info ei = { 1026 .fofs = fofs, 1027 .len = len, 1028 }; 1029 1030 if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE)) 1031 return; 1032 1033 __update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE); 1034 } 1035 1036 unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 1037 { 1038 if (!test_opt(sbi, AGE_EXTENT_CACHE)) 1039 return 0; 1040 1041 return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE); 1042 } 1043 1044 static unsigned int __destroy_extent_node(struct inode *inode, 1045 enum extent_type type) 1046 { 1047 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1048 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1049 unsigned int node_cnt = 0; 1050 1051 if (!et || !atomic_read(&et->node_cnt)) 1052 return 0; 1053 1054 write_lock(&et->lock); 1055 node_cnt = __free_extent_tree(sbi, et); 1056 write_unlock(&et->lock); 1057 1058 return node_cnt; 1059 } 1060 1061 void f2fs_destroy_extent_node(struct inode *inode) 1062 { 1063 __destroy_extent_node(inode, EX_READ); 1064 __destroy_extent_node(inode, EX_BLOCK_AGE); 1065 } 1066 1067 static void __drop_extent_tree(struct inode *inode, enum extent_type type) 1068 { 1069 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1070 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1071 bool updated = false; 1072 1073 if (!__may_extent_tree(inode, type)) 1074 return; 1075 1076 write_lock(&et->lock); 1077 __free_extent_tree(sbi, et); 1078 if (type == EX_READ) { 1079 set_inode_flag(inode, FI_NO_EXTENT); 1080 if (et->largest.len) { 1081 et->largest.len = 0; 1082 updated = true; 1083 } 1084 } 1085 write_unlock(&et->lock); 1086 if (updated) 1087 f2fs_mark_inode_dirty_sync(inode, true); 1088 } 1089 1090 void f2fs_drop_extent_tree(struct inode *inode) 1091 { 1092 __drop_extent_tree(inode, EX_READ); 1093 __drop_extent_tree(inode, EX_BLOCK_AGE); 1094 } 1095 1096 static void __destroy_extent_tree(struct inode *inode, enum extent_type type) 1097 { 1098 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1099 struct extent_tree_info *eti = &sbi->extent_tree[type]; 1100 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1101 unsigned int node_cnt = 0; 1102 1103 if (!et) 1104 return; 1105 1106 if (inode->i_nlink && !is_bad_inode(inode) && 1107 atomic_read(&et->node_cnt)) { 1108 mutex_lock(&eti->extent_tree_lock); 1109 list_add_tail(&et->list, &eti->zombie_list); 1110 atomic_inc(&eti->total_zombie_tree); 1111 mutex_unlock(&eti->extent_tree_lock); 1112 return; 1113 } 1114 1115 /* free all extent info belong to this extent tree */ 1116 node_cnt = __destroy_extent_node(inode, type); 1117 1118 /* delete extent tree entry in radix tree */ 1119 mutex_lock(&eti->extent_tree_lock); 1120 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 1121 radix_tree_delete(&eti->extent_tree_root, inode->i_ino); 1122 kmem_cache_free(extent_tree_slab, et); 1123 atomic_dec(&eti->total_ext_tree); 1124 mutex_unlock(&eti->extent_tree_lock); 1125 1126 F2FS_I(inode)->extent_tree[type] = NULL; 1127 1128 trace_f2fs_destroy_extent_tree(inode, node_cnt, type); 1129 } 1130 1131 void f2fs_destroy_extent_tree(struct inode *inode) 1132 { 1133 __destroy_extent_tree(inode, EX_READ); 1134 __destroy_extent_tree(inode, EX_BLOCK_AGE); 1135 } 1136 1137 static void __init_extent_tree_info(struct extent_tree_info *eti) 1138 { 1139 INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO); 1140 mutex_init(&eti->extent_tree_lock); 1141 INIT_LIST_HEAD(&eti->extent_list); 1142 spin_lock_init(&eti->extent_lock); 1143 atomic_set(&eti->total_ext_tree, 0); 1144 INIT_LIST_HEAD(&eti->zombie_list); 1145 atomic_set(&eti->total_zombie_tree, 0); 1146 atomic_set(&eti->total_ext_node, 0); 1147 } 1148 1149 void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) 1150 { 1151 __init_extent_tree_info(&sbi->extent_tree[EX_READ]); 1152 __init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]); 1153 1154 /* initialize for block age extents */ 1155 atomic64_set(&sbi->allocated_data_blocks, 0); 1156 sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD; 1157 sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD; 1158 sbi->last_age_weight = LAST_AGE_WEIGHT; 1159 } 1160 1161 int __init f2fs_create_extent_cache(void) 1162 { 1163 extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", 1164 sizeof(struct extent_tree)); 1165 if (!extent_tree_slab) 1166 return -ENOMEM; 1167 extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", 1168 sizeof(struct extent_node)); 1169 if (!extent_node_slab) { 1170 kmem_cache_destroy(extent_tree_slab); 1171 return -ENOMEM; 1172 } 1173 return 0; 1174 } 1175 1176 void f2fs_destroy_extent_cache(void) 1177 { 1178 kmem_cache_destroy(extent_node_slab); 1179 kmem_cache_destroy(extent_tree_slab); 1180 } 1181