1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * f2fs extent cache support 4 * 5 * Copyright (c) 2015 Motorola Mobility 6 * Copyright (c) 2015 Samsung Electronics 7 * Authors: Jaegeuk Kim <jaegeuk@kernel.org> 8 * Chao Yu <chao2.yu@samsung.com> 9 * 10 * block_age-based extent cache added by: 11 * Copyright (c) 2022 xiaomi Co., Ltd. 12 * http://www.xiaomi.com/ 13 */ 14 15 #include <linux/fs.h> 16 #include <linux/f2fs_fs.h> 17 18 #include "f2fs.h" 19 #include "node.h" 20 #include <trace/events/f2fs.h> 21 22 bool sanity_check_extent_cache(struct inode *inode) 23 { 24 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 25 struct f2fs_inode_info *fi = F2FS_I(inode); 26 struct extent_tree *et = fi->extent_tree[EX_READ]; 27 struct extent_info *ei; 28 29 if (!et) 30 return true; 31 32 ei = &et->largest; 33 if (!ei->len) 34 return true; 35 36 /* Let's drop, if checkpoint got corrupted. */ 37 if (is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) { 38 ei->len = 0; 39 et->largest_updated = true; 40 return true; 41 } 42 43 if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) || 44 !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, 45 DATA_GENERIC_ENHANCE)) { 46 f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", 47 __func__, inode->i_ino, 48 ei->blk, ei->fofs, ei->len); 49 return false; 50 } 51 return true; 52 } 53 54 static void __set_extent_info(struct extent_info *ei, 55 unsigned int fofs, unsigned int len, 56 block_t blk, bool keep_clen, 57 unsigned long age, unsigned long last_blocks, 58 enum extent_type type) 59 { 60 ei->fofs = fofs; 61 ei->len = len; 62 63 if (type == EX_READ) { 64 ei->blk = blk; 65 if (keep_clen) 66 return; 67 #ifdef CONFIG_F2FS_FS_COMPRESSION 68 ei->c_len = 0; 69 #endif 70 } else if (type == EX_BLOCK_AGE) { 71 ei->age = age; 72 ei->last_blocks = last_blocks; 73 } 74 } 75 76 static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) 77 { 78 if (type == EX_READ) 79 return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) && 80 S_ISREG(inode->i_mode); 81 if (type == EX_BLOCK_AGE) 82 return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) && 83 (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)); 84 return false; 85 } 86 87 static bool __may_extent_tree(struct inode *inode, enum extent_type type) 88 { 89 /* 90 * for recovered files during mount do not create extents 91 * if shrinker is not registered. 92 */ 93 if (list_empty(&F2FS_I_SB(inode)->s_list)) 94 return false; 95 96 if (!__init_may_extent_tree(inode, type)) 97 return false; 98 99 if (type == EX_READ) { 100 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 101 return false; 102 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && 103 !f2fs_sb_has_readonly(F2FS_I_SB(inode))) 104 return false; 105 } else if (type == EX_BLOCK_AGE) { 106 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) 107 return false; 108 if (file_is_cold(inode)) 109 return false; 110 } 111 return true; 112 } 113 114 static void __try_update_largest_extent(struct extent_tree *et, 115 struct extent_node *en) 116 { 117 if (et->type != EX_READ) 118 return; 119 if (en->ei.len <= et->largest.len) 120 return; 121 122 et->largest = en->ei; 123 et->largest_updated = true; 124 } 125 126 static bool __is_extent_mergeable(struct extent_info *back, 127 struct extent_info *front, enum extent_type type) 128 { 129 if (type == EX_READ) { 130 #ifdef CONFIG_F2FS_FS_COMPRESSION 131 if (back->c_len && back->len != back->c_len) 132 return false; 133 if (front->c_len && front->len != front->c_len) 134 return false; 135 #endif 136 return (back->fofs + back->len == front->fofs && 137 back->blk + back->len == front->blk); 138 } else if (type == EX_BLOCK_AGE) { 139 return (back->fofs + back->len == front->fofs && 140 abs(back->age - front->age) <= SAME_AGE_REGION && 141 abs(back->last_blocks - front->last_blocks) <= 142 SAME_AGE_REGION); 143 } 144 return false; 145 } 146 147 static bool __is_back_mergeable(struct extent_info *cur, 148 struct extent_info *back, enum extent_type type) 149 { 150 return __is_extent_mergeable(back, cur, type); 151 } 152 153 static bool __is_front_mergeable(struct extent_info *cur, 154 struct extent_info *front, enum extent_type type) 155 { 156 return __is_extent_mergeable(cur, front, type); 157 } 158 159 static struct extent_node *__lookup_extent_node(struct rb_root_cached *root, 160 struct extent_node *cached_en, unsigned int fofs) 161 { 162 struct rb_node *node = root->rb_root.rb_node; 163 struct extent_node *en; 164 165 /* check a cached entry */ 166 if (cached_en && cached_en->ei.fofs <= fofs && 167 cached_en->ei.fofs + cached_en->ei.len > fofs) 168 return cached_en; 169 170 /* check rb_tree */ 171 while (node) { 172 en = rb_entry(node, struct extent_node, rb_node); 173 174 if (fofs < en->ei.fofs) 175 node = node->rb_left; 176 else if (fofs >= en->ei.fofs + en->ei.len) 177 node = node->rb_right; 178 else 179 return en; 180 } 181 return NULL; 182 } 183 184 /* 185 * lookup rb entry in position of @fofs in rb-tree, 186 * if hit, return the entry, otherwise, return NULL 187 * @prev_ex: extent before fofs 188 * @next_ex: extent after fofs 189 * @insert_p: insert point for new extent at fofs 190 * in order to simplify the insertion after. 191 * tree must stay unchanged between lookup and insertion. 192 */ 193 static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root, 194 struct extent_node *cached_en, 195 unsigned int fofs, 196 struct extent_node **prev_entry, 197 struct extent_node **next_entry, 198 struct rb_node ***insert_p, 199 struct rb_node **insert_parent, 200 bool *leftmost) 201 { 202 struct rb_node **pnode = &root->rb_root.rb_node; 203 struct rb_node *parent = NULL, *tmp_node; 204 struct extent_node *en = cached_en; 205 206 *insert_p = NULL; 207 *insert_parent = NULL; 208 *prev_entry = NULL; 209 *next_entry = NULL; 210 211 if (RB_EMPTY_ROOT(&root->rb_root)) 212 return NULL; 213 214 if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs) 215 goto lookup_neighbors; 216 217 *leftmost = true; 218 219 while (*pnode) { 220 parent = *pnode; 221 en = rb_entry(*pnode, struct extent_node, rb_node); 222 223 if (fofs < en->ei.fofs) { 224 pnode = &(*pnode)->rb_left; 225 } else if (fofs >= en->ei.fofs + en->ei.len) { 226 pnode = &(*pnode)->rb_right; 227 *leftmost = false; 228 } else { 229 goto lookup_neighbors; 230 } 231 } 232 233 *insert_p = pnode; 234 *insert_parent = parent; 235 236 en = rb_entry(parent, struct extent_node, rb_node); 237 tmp_node = parent; 238 if (parent && fofs > en->ei.fofs) 239 tmp_node = rb_next(parent); 240 *next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node); 241 242 tmp_node = parent; 243 if (parent && fofs < en->ei.fofs) 244 tmp_node = rb_prev(parent); 245 *prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node); 246 return NULL; 247 248 lookup_neighbors: 249 if (fofs == en->ei.fofs) { 250 /* lookup prev node for merging backward later */ 251 tmp_node = rb_prev(&en->rb_node); 252 *prev_entry = rb_entry_safe(tmp_node, 253 struct extent_node, rb_node); 254 } 255 if (fofs == en->ei.fofs + en->ei.len - 1) { 256 /* lookup next node for merging frontward later */ 257 tmp_node = rb_next(&en->rb_node); 258 *next_entry = rb_entry_safe(tmp_node, 259 struct extent_node, rb_node); 260 } 261 return en; 262 } 263 264 static struct kmem_cache *extent_tree_slab; 265 static struct kmem_cache *extent_node_slab; 266 267 static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, 268 struct extent_tree *et, struct extent_info *ei, 269 struct rb_node *parent, struct rb_node **p, 270 bool leftmost) 271 { 272 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 273 struct extent_node *en; 274 275 en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi); 276 if (!en) 277 return NULL; 278 279 en->ei = *ei; 280 INIT_LIST_HEAD(&en->list); 281 en->et = et; 282 283 rb_link_node(&en->rb_node, parent, p); 284 rb_insert_color_cached(&en->rb_node, &et->root, leftmost); 285 atomic_inc(&et->node_cnt); 286 atomic_inc(&eti->total_ext_node); 287 return en; 288 } 289 290 static void __detach_extent_node(struct f2fs_sb_info *sbi, 291 struct extent_tree *et, struct extent_node *en) 292 { 293 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 294 295 rb_erase_cached(&en->rb_node, &et->root); 296 atomic_dec(&et->node_cnt); 297 atomic_dec(&eti->total_ext_node); 298 299 if (et->cached_en == en) 300 et->cached_en = NULL; 301 kmem_cache_free(extent_node_slab, en); 302 } 303 304 /* 305 * Flow to release an extent_node: 306 * 1. list_del_init 307 * 2. __detach_extent_node 308 * 3. kmem_cache_free. 309 */ 310 static void __release_extent_node(struct f2fs_sb_info *sbi, 311 struct extent_tree *et, struct extent_node *en) 312 { 313 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 314 315 spin_lock(&eti->extent_lock); 316 f2fs_bug_on(sbi, list_empty(&en->list)); 317 list_del_init(&en->list); 318 spin_unlock(&eti->extent_lock); 319 320 __detach_extent_node(sbi, et, en); 321 } 322 323 static struct extent_tree *__grab_extent_tree(struct inode *inode, 324 enum extent_type type) 325 { 326 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 327 struct extent_tree_info *eti = &sbi->extent_tree[type]; 328 struct extent_tree *et; 329 nid_t ino = inode->i_ino; 330 331 mutex_lock(&eti->extent_tree_lock); 332 et = radix_tree_lookup(&eti->extent_tree_root, ino); 333 if (!et) { 334 et = f2fs_kmem_cache_alloc(extent_tree_slab, 335 GFP_NOFS, true, NULL); 336 f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et); 337 memset(et, 0, sizeof(struct extent_tree)); 338 et->ino = ino; 339 et->type = type; 340 et->root = RB_ROOT_CACHED; 341 et->cached_en = NULL; 342 rwlock_init(&et->lock); 343 INIT_LIST_HEAD(&et->list); 344 atomic_set(&et->node_cnt, 0); 345 atomic_inc(&eti->total_ext_tree); 346 } else { 347 atomic_dec(&eti->total_zombie_tree); 348 list_del_init(&et->list); 349 } 350 mutex_unlock(&eti->extent_tree_lock); 351 352 /* never died until evict_inode */ 353 F2FS_I(inode)->extent_tree[type] = et; 354 355 return et; 356 } 357 358 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, 359 struct extent_tree *et) 360 { 361 struct rb_node *node, *next; 362 struct extent_node *en; 363 unsigned int count = atomic_read(&et->node_cnt); 364 365 node = rb_first_cached(&et->root); 366 while (node) { 367 next = rb_next(node); 368 en = rb_entry(node, struct extent_node, rb_node); 369 __release_extent_node(sbi, et, en); 370 node = next; 371 } 372 373 return count - atomic_read(&et->node_cnt); 374 } 375 376 static void __drop_largest_extent(struct extent_tree *et, 377 pgoff_t fofs, unsigned int len) 378 { 379 if (fofs < et->largest.fofs + et->largest.len && 380 fofs + len > et->largest.fofs) { 381 et->largest.len = 0; 382 et->largest_updated = true; 383 } 384 } 385 386 void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) 387 { 388 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 389 struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; 390 struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; 391 struct extent_tree *et; 392 struct extent_node *en; 393 struct extent_info ei; 394 395 if (!__may_extent_tree(inode, EX_READ)) { 396 /* drop largest read extent */ 397 if (i_ext && i_ext->len) { 398 f2fs_wait_on_page_writeback(ipage, NODE, true, true); 399 i_ext->len = 0; 400 set_page_dirty(ipage); 401 } 402 goto out; 403 } 404 405 et = __grab_extent_tree(inode, EX_READ); 406 407 if (!i_ext || !i_ext->len) 408 goto out; 409 410 get_read_extent_info(&ei, i_ext); 411 412 write_lock(&et->lock); 413 if (atomic_read(&et->node_cnt)) 414 goto unlock_out; 415 416 en = __attach_extent_node(sbi, et, &ei, NULL, 417 &et->root.rb_root.rb_node, true); 418 if (en) { 419 et->largest = en->ei; 420 et->cached_en = en; 421 422 spin_lock(&eti->extent_lock); 423 list_add_tail(&en->list, &eti->extent_list); 424 spin_unlock(&eti->extent_lock); 425 } 426 unlock_out: 427 write_unlock(&et->lock); 428 out: 429 if (!F2FS_I(inode)->extent_tree[EX_READ]) 430 set_inode_flag(inode, FI_NO_EXTENT); 431 } 432 433 void f2fs_init_age_extent_tree(struct inode *inode) 434 { 435 if (!__init_may_extent_tree(inode, EX_BLOCK_AGE)) 436 return; 437 __grab_extent_tree(inode, EX_BLOCK_AGE); 438 } 439 440 void f2fs_init_extent_tree(struct inode *inode) 441 { 442 /* initialize read cache */ 443 if (__init_may_extent_tree(inode, EX_READ)) 444 __grab_extent_tree(inode, EX_READ); 445 446 /* initialize block age cache */ 447 if (__init_may_extent_tree(inode, EX_BLOCK_AGE)) 448 __grab_extent_tree(inode, EX_BLOCK_AGE); 449 } 450 451 static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, 452 struct extent_info *ei, enum extent_type type) 453 { 454 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 455 struct extent_tree_info *eti = &sbi->extent_tree[type]; 456 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 457 struct extent_node *en; 458 bool ret = false; 459 460 if (!et) 461 return false; 462 463 trace_f2fs_lookup_extent_tree_start(inode, pgofs, type); 464 465 read_lock(&et->lock); 466 467 if (type == EX_READ && 468 et->largest.fofs <= pgofs && 469 et->largest.fofs + et->largest.len > pgofs) { 470 *ei = et->largest; 471 ret = true; 472 stat_inc_largest_node_hit(sbi); 473 goto out; 474 } 475 476 en = __lookup_extent_node(&et->root, et->cached_en, pgofs); 477 if (!en) 478 goto out; 479 480 if (en == et->cached_en) 481 stat_inc_cached_node_hit(sbi, type); 482 else 483 stat_inc_rbtree_node_hit(sbi, type); 484 485 *ei = en->ei; 486 spin_lock(&eti->extent_lock); 487 if (!list_empty(&en->list)) { 488 list_move_tail(&en->list, &eti->extent_list); 489 et->cached_en = en; 490 } 491 spin_unlock(&eti->extent_lock); 492 ret = true; 493 out: 494 stat_inc_total_hit(sbi, type); 495 read_unlock(&et->lock); 496 497 if (type == EX_READ) 498 trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei); 499 else if (type == EX_BLOCK_AGE) 500 trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei); 501 return ret; 502 } 503 504 static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, 505 struct extent_tree *et, struct extent_info *ei, 506 struct extent_node *prev_ex, 507 struct extent_node *next_ex) 508 { 509 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 510 struct extent_node *en = NULL; 511 512 if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) { 513 prev_ex->ei.len += ei->len; 514 ei = &prev_ex->ei; 515 en = prev_ex; 516 } 517 518 if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) { 519 next_ex->ei.fofs = ei->fofs; 520 next_ex->ei.len += ei->len; 521 if (et->type == EX_READ) 522 next_ex->ei.blk = ei->blk; 523 if (en) 524 __release_extent_node(sbi, et, prev_ex); 525 526 en = next_ex; 527 } 528 529 if (!en) 530 return NULL; 531 532 __try_update_largest_extent(et, en); 533 534 spin_lock(&eti->extent_lock); 535 if (!list_empty(&en->list)) { 536 list_move_tail(&en->list, &eti->extent_list); 537 et->cached_en = en; 538 } 539 spin_unlock(&eti->extent_lock); 540 return en; 541 } 542 543 static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, 544 struct extent_tree *et, struct extent_info *ei, 545 struct rb_node **insert_p, 546 struct rb_node *insert_parent, 547 bool leftmost) 548 { 549 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 550 struct rb_node **p = &et->root.rb_root.rb_node; 551 struct rb_node *parent = NULL; 552 struct extent_node *en = NULL; 553 554 if (insert_p && insert_parent) { 555 parent = insert_parent; 556 p = insert_p; 557 goto do_insert; 558 } 559 560 leftmost = true; 561 562 /* look up extent_node in the rb tree */ 563 while (*p) { 564 parent = *p; 565 en = rb_entry(parent, struct extent_node, rb_node); 566 567 if (ei->fofs < en->ei.fofs) { 568 p = &(*p)->rb_left; 569 } else if (ei->fofs >= en->ei.fofs + en->ei.len) { 570 p = &(*p)->rb_right; 571 leftmost = false; 572 } else { 573 f2fs_bug_on(sbi, 1); 574 } 575 } 576 577 do_insert: 578 en = __attach_extent_node(sbi, et, ei, parent, p, leftmost); 579 if (!en) 580 return NULL; 581 582 __try_update_largest_extent(et, en); 583 584 /* update in global extent list */ 585 spin_lock(&eti->extent_lock); 586 list_add_tail(&en->list, &eti->extent_list); 587 et->cached_en = en; 588 spin_unlock(&eti->extent_lock); 589 return en; 590 } 591 592 static void __update_extent_tree_range(struct inode *inode, 593 struct extent_info *tei, enum extent_type type) 594 { 595 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 596 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 597 struct extent_node *en = NULL, *en1 = NULL; 598 struct extent_node *prev_en = NULL, *next_en = NULL; 599 struct extent_info ei, dei, prev; 600 struct rb_node **insert_p = NULL, *insert_parent = NULL; 601 unsigned int fofs = tei->fofs, len = tei->len; 602 unsigned int end = fofs + len; 603 bool updated = false; 604 bool leftmost = false; 605 606 if (!et) 607 return; 608 609 if (type == EX_READ) 610 trace_f2fs_update_read_extent_tree_range(inode, fofs, len, 611 tei->blk, 0); 612 else if (type == EX_BLOCK_AGE) 613 trace_f2fs_update_age_extent_tree_range(inode, fofs, len, 614 tei->age, tei->last_blocks); 615 616 write_lock(&et->lock); 617 618 if (type == EX_READ) { 619 if (is_inode_flag_set(inode, FI_NO_EXTENT)) { 620 write_unlock(&et->lock); 621 return; 622 } 623 624 prev = et->largest; 625 dei.len = 0; 626 627 /* 628 * drop largest extent before lookup, in case it's already 629 * been shrunk from extent tree 630 */ 631 __drop_largest_extent(et, fofs, len); 632 } 633 634 /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ 635 en = __lookup_extent_node_ret(&et->root, 636 et->cached_en, fofs, 637 &prev_en, &next_en, 638 &insert_p, &insert_parent, 639 &leftmost); 640 if (!en) 641 en = next_en; 642 643 /* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */ 644 while (en && en->ei.fofs < end) { 645 unsigned int org_end; 646 int parts = 0; /* # of parts current extent split into */ 647 648 next_en = en1 = NULL; 649 650 dei = en->ei; 651 org_end = dei.fofs + dei.len; 652 f2fs_bug_on(sbi, fofs >= org_end); 653 654 if (fofs > dei.fofs && (type != EX_READ || 655 fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) { 656 en->ei.len = fofs - en->ei.fofs; 657 prev_en = en; 658 parts = 1; 659 } 660 661 if (end < org_end && (type != EX_READ || 662 org_end - end >= F2FS_MIN_EXTENT_LEN)) { 663 if (parts) { 664 __set_extent_info(&ei, 665 end, org_end - end, 666 end - dei.fofs + dei.blk, false, 667 dei.age, dei.last_blocks, 668 type); 669 en1 = __insert_extent_tree(sbi, et, &ei, 670 NULL, NULL, true); 671 next_en = en1; 672 } else { 673 __set_extent_info(&en->ei, 674 end, en->ei.len - (end - dei.fofs), 675 en->ei.blk + (end - dei.fofs), true, 676 dei.age, dei.last_blocks, 677 type); 678 next_en = en; 679 } 680 parts++; 681 } 682 683 if (!next_en) { 684 struct rb_node *node = rb_next(&en->rb_node); 685 686 next_en = rb_entry_safe(node, struct extent_node, 687 rb_node); 688 } 689 690 if (parts) 691 __try_update_largest_extent(et, en); 692 else 693 __release_extent_node(sbi, et, en); 694 695 /* 696 * if original extent is split into zero or two parts, extent 697 * tree has been altered by deletion or insertion, therefore 698 * invalidate pointers regard to tree. 699 */ 700 if (parts != 1) { 701 insert_p = NULL; 702 insert_parent = NULL; 703 } 704 en = next_en; 705 } 706 707 if (type == EX_BLOCK_AGE) 708 goto update_age_extent_cache; 709 710 /* 3. update extent in read extent cache */ 711 BUG_ON(type != EX_READ); 712 713 if (tei->blk) { 714 __set_extent_info(&ei, fofs, len, tei->blk, false, 715 0, 0, EX_READ); 716 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 717 __insert_extent_tree(sbi, et, &ei, 718 insert_p, insert_parent, leftmost); 719 720 /* give up extent_cache, if split and small updates happen */ 721 if (dei.len >= 1 && 722 prev.len < F2FS_MIN_EXTENT_LEN && 723 et->largest.len < F2FS_MIN_EXTENT_LEN) { 724 et->largest.len = 0; 725 et->largest_updated = true; 726 set_inode_flag(inode, FI_NO_EXTENT); 727 } 728 } 729 730 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 731 __free_extent_tree(sbi, et); 732 733 if (et->largest_updated) { 734 et->largest_updated = false; 735 updated = true; 736 } 737 goto out_read_extent_cache; 738 update_age_extent_cache: 739 if (!tei->last_blocks) 740 goto out_read_extent_cache; 741 742 __set_extent_info(&ei, fofs, len, 0, false, 743 tei->age, tei->last_blocks, EX_BLOCK_AGE); 744 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 745 __insert_extent_tree(sbi, et, &ei, 746 insert_p, insert_parent, leftmost); 747 out_read_extent_cache: 748 write_unlock(&et->lock); 749 750 if (updated) 751 f2fs_mark_inode_dirty_sync(inode, true); 752 } 753 754 #ifdef CONFIG_F2FS_FS_COMPRESSION 755 void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, 756 pgoff_t fofs, block_t blkaddr, unsigned int llen, 757 unsigned int c_len) 758 { 759 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 760 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; 761 struct extent_node *en = NULL; 762 struct extent_node *prev_en = NULL, *next_en = NULL; 763 struct extent_info ei; 764 struct rb_node **insert_p = NULL, *insert_parent = NULL; 765 bool leftmost = false; 766 767 trace_f2fs_update_read_extent_tree_range(inode, fofs, llen, 768 blkaddr, c_len); 769 770 /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */ 771 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 772 return; 773 774 write_lock(&et->lock); 775 776 en = __lookup_extent_node_ret(&et->root, 777 et->cached_en, fofs, 778 &prev_en, &next_en, 779 &insert_p, &insert_parent, 780 &leftmost); 781 if (en) 782 goto unlock_out; 783 784 __set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ); 785 ei.c_len = c_len; 786 787 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 788 __insert_extent_tree(sbi, et, &ei, 789 insert_p, insert_parent, leftmost); 790 unlock_out: 791 write_unlock(&et->lock); 792 } 793 #endif 794 795 static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi, 796 unsigned long long new, 797 unsigned long long old) 798 { 799 unsigned int rem_old, rem_new; 800 unsigned long long res; 801 unsigned int weight = sbi->last_age_weight; 802 803 res = div_u64_rem(new, 100, &rem_new) * (100 - weight) 804 + div_u64_rem(old, 100, &rem_old) * weight; 805 806 if (rem_new) 807 res += rem_new * (100 - weight) / 100; 808 if (rem_old) 809 res += rem_old * weight / 100; 810 811 return res; 812 } 813 814 /* This returns a new age and allocated blocks in ei */ 815 static int __get_new_block_age(struct inode *inode, struct extent_info *ei, 816 block_t blkaddr) 817 { 818 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 819 loff_t f_size = i_size_read(inode); 820 unsigned long long cur_blocks = 821 atomic64_read(&sbi->allocated_data_blocks); 822 struct extent_info tei = *ei; /* only fofs and len are valid */ 823 824 /* 825 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last 826 * file block even in seq write. So don't record age for newly last file 827 * block here. 828 */ 829 if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) && 830 blkaddr == NEW_ADDR) 831 return -EINVAL; 832 833 if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) { 834 unsigned long long cur_age; 835 836 if (cur_blocks >= tei.last_blocks) 837 cur_age = cur_blocks - tei.last_blocks; 838 else 839 /* allocated_data_blocks overflow */ 840 cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks; 841 842 if (tei.age) 843 ei->age = __calculate_block_age(sbi, cur_age, tei.age); 844 else 845 ei->age = cur_age; 846 ei->last_blocks = cur_blocks; 847 WARN_ON(ei->age > cur_blocks); 848 return 0; 849 } 850 851 f2fs_bug_on(sbi, blkaddr == NULL_ADDR); 852 853 /* the data block was allocated for the first time */ 854 if (blkaddr == NEW_ADDR) 855 goto out; 856 857 if (__is_valid_data_blkaddr(blkaddr) && 858 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) 859 return -EINVAL; 860 out: 861 /* 862 * init block age with zero, this can happen when the block age extent 863 * was reclaimed due to memory constraint or system reboot 864 */ 865 ei->age = 0; 866 ei->last_blocks = cur_blocks; 867 return 0; 868 } 869 870 static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type) 871 { 872 struct extent_info ei = {}; 873 874 if (!__may_extent_tree(dn->inode, type)) 875 return; 876 877 ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + 878 dn->ofs_in_node; 879 ei.len = 1; 880 881 if (type == EX_READ) { 882 if (dn->data_blkaddr == NEW_ADDR) 883 ei.blk = NULL_ADDR; 884 else 885 ei.blk = dn->data_blkaddr; 886 } else if (type == EX_BLOCK_AGE) { 887 if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr)) 888 return; 889 } 890 __update_extent_tree_range(dn->inode, &ei, type); 891 } 892 893 static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink, 894 enum extent_type type) 895 { 896 struct extent_tree_info *eti = &sbi->extent_tree[type]; 897 struct extent_tree *et, *next; 898 struct extent_node *en; 899 unsigned int node_cnt = 0, tree_cnt = 0; 900 int remained; 901 902 if (!atomic_read(&eti->total_zombie_tree)) 903 goto free_node; 904 905 if (!mutex_trylock(&eti->extent_tree_lock)) 906 goto out; 907 908 /* 1. remove unreferenced extent tree */ 909 list_for_each_entry_safe(et, next, &eti->zombie_list, list) { 910 if (atomic_read(&et->node_cnt)) { 911 write_lock(&et->lock); 912 node_cnt += __free_extent_tree(sbi, et); 913 write_unlock(&et->lock); 914 } 915 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 916 list_del_init(&et->list); 917 radix_tree_delete(&eti->extent_tree_root, et->ino); 918 kmem_cache_free(extent_tree_slab, et); 919 atomic_dec(&eti->total_ext_tree); 920 atomic_dec(&eti->total_zombie_tree); 921 tree_cnt++; 922 923 if (node_cnt + tree_cnt >= nr_shrink) 924 goto unlock_out; 925 cond_resched(); 926 } 927 mutex_unlock(&eti->extent_tree_lock); 928 929 free_node: 930 /* 2. remove LRU extent entries */ 931 if (!mutex_trylock(&eti->extent_tree_lock)) 932 goto out; 933 934 remained = nr_shrink - (node_cnt + tree_cnt); 935 936 spin_lock(&eti->extent_lock); 937 for (; remained > 0; remained--) { 938 if (list_empty(&eti->extent_list)) 939 break; 940 en = list_first_entry(&eti->extent_list, 941 struct extent_node, list); 942 et = en->et; 943 if (!write_trylock(&et->lock)) { 944 /* refresh this extent node's position in extent list */ 945 list_move_tail(&en->list, &eti->extent_list); 946 continue; 947 } 948 949 list_del_init(&en->list); 950 spin_unlock(&eti->extent_lock); 951 952 __detach_extent_node(sbi, et, en); 953 954 write_unlock(&et->lock); 955 node_cnt++; 956 spin_lock(&eti->extent_lock); 957 } 958 spin_unlock(&eti->extent_lock); 959 960 unlock_out: 961 mutex_unlock(&eti->extent_tree_lock); 962 out: 963 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type); 964 965 return node_cnt + tree_cnt; 966 } 967 968 /* read extent cache operations */ 969 bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, 970 struct extent_info *ei) 971 { 972 if (!__may_extent_tree(inode, EX_READ)) 973 return false; 974 975 return __lookup_extent_tree(inode, pgofs, ei, EX_READ); 976 } 977 978 bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index, 979 block_t *blkaddr) 980 { 981 struct extent_info ei = {}; 982 983 if (!f2fs_lookup_read_extent_cache(inode, index, &ei)) 984 return false; 985 *blkaddr = ei.blk + index - ei.fofs; 986 return true; 987 } 988 989 void f2fs_update_read_extent_cache(struct dnode_of_data *dn) 990 { 991 return __update_extent_cache(dn, EX_READ); 992 } 993 994 void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, 995 pgoff_t fofs, block_t blkaddr, unsigned int len) 996 { 997 struct extent_info ei = { 998 .fofs = fofs, 999 .len = len, 1000 .blk = blkaddr, 1001 }; 1002 1003 if (!__may_extent_tree(dn->inode, EX_READ)) 1004 return; 1005 1006 __update_extent_tree_range(dn->inode, &ei, EX_READ); 1007 } 1008 1009 unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 1010 { 1011 if (!test_opt(sbi, READ_EXTENT_CACHE)) 1012 return 0; 1013 1014 return __shrink_extent_tree(sbi, nr_shrink, EX_READ); 1015 } 1016 1017 /* block age extent cache operations */ 1018 bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs, 1019 struct extent_info *ei) 1020 { 1021 if (!__may_extent_tree(inode, EX_BLOCK_AGE)) 1022 return false; 1023 1024 return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE); 1025 } 1026 1027 void f2fs_update_age_extent_cache(struct dnode_of_data *dn) 1028 { 1029 return __update_extent_cache(dn, EX_BLOCK_AGE); 1030 } 1031 1032 void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, 1033 pgoff_t fofs, unsigned int len) 1034 { 1035 struct extent_info ei = { 1036 .fofs = fofs, 1037 .len = len, 1038 }; 1039 1040 if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE)) 1041 return; 1042 1043 __update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE); 1044 } 1045 1046 unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 1047 { 1048 if (!test_opt(sbi, AGE_EXTENT_CACHE)) 1049 return 0; 1050 1051 return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE); 1052 } 1053 1054 static unsigned int __destroy_extent_node(struct inode *inode, 1055 enum extent_type type) 1056 { 1057 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1058 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1059 unsigned int node_cnt = 0; 1060 1061 if (!et || !atomic_read(&et->node_cnt)) 1062 return 0; 1063 1064 write_lock(&et->lock); 1065 node_cnt = __free_extent_tree(sbi, et); 1066 write_unlock(&et->lock); 1067 1068 return node_cnt; 1069 } 1070 1071 void f2fs_destroy_extent_node(struct inode *inode) 1072 { 1073 __destroy_extent_node(inode, EX_READ); 1074 __destroy_extent_node(inode, EX_BLOCK_AGE); 1075 } 1076 1077 static void __drop_extent_tree(struct inode *inode, enum extent_type type) 1078 { 1079 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1080 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1081 bool updated = false; 1082 1083 if (!__may_extent_tree(inode, type)) 1084 return; 1085 1086 write_lock(&et->lock); 1087 __free_extent_tree(sbi, et); 1088 if (type == EX_READ) { 1089 set_inode_flag(inode, FI_NO_EXTENT); 1090 if (et->largest.len) { 1091 et->largest.len = 0; 1092 updated = true; 1093 } 1094 } 1095 write_unlock(&et->lock); 1096 if (updated) 1097 f2fs_mark_inode_dirty_sync(inode, true); 1098 } 1099 1100 void f2fs_drop_extent_tree(struct inode *inode) 1101 { 1102 __drop_extent_tree(inode, EX_READ); 1103 __drop_extent_tree(inode, EX_BLOCK_AGE); 1104 } 1105 1106 static void __destroy_extent_tree(struct inode *inode, enum extent_type type) 1107 { 1108 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1109 struct extent_tree_info *eti = &sbi->extent_tree[type]; 1110 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1111 unsigned int node_cnt = 0; 1112 1113 if (!et) 1114 return; 1115 1116 if (inode->i_nlink && !is_bad_inode(inode) && 1117 atomic_read(&et->node_cnt)) { 1118 mutex_lock(&eti->extent_tree_lock); 1119 list_add_tail(&et->list, &eti->zombie_list); 1120 atomic_inc(&eti->total_zombie_tree); 1121 mutex_unlock(&eti->extent_tree_lock); 1122 return; 1123 } 1124 1125 /* free all extent info belong to this extent tree */ 1126 node_cnt = __destroy_extent_node(inode, type); 1127 1128 /* delete extent tree entry in radix tree */ 1129 mutex_lock(&eti->extent_tree_lock); 1130 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 1131 radix_tree_delete(&eti->extent_tree_root, inode->i_ino); 1132 kmem_cache_free(extent_tree_slab, et); 1133 atomic_dec(&eti->total_ext_tree); 1134 mutex_unlock(&eti->extent_tree_lock); 1135 1136 F2FS_I(inode)->extent_tree[type] = NULL; 1137 1138 trace_f2fs_destroy_extent_tree(inode, node_cnt, type); 1139 } 1140 1141 void f2fs_destroy_extent_tree(struct inode *inode) 1142 { 1143 __destroy_extent_tree(inode, EX_READ); 1144 __destroy_extent_tree(inode, EX_BLOCK_AGE); 1145 } 1146 1147 static void __init_extent_tree_info(struct extent_tree_info *eti) 1148 { 1149 INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO); 1150 mutex_init(&eti->extent_tree_lock); 1151 INIT_LIST_HEAD(&eti->extent_list); 1152 spin_lock_init(&eti->extent_lock); 1153 atomic_set(&eti->total_ext_tree, 0); 1154 INIT_LIST_HEAD(&eti->zombie_list); 1155 atomic_set(&eti->total_zombie_tree, 0); 1156 atomic_set(&eti->total_ext_node, 0); 1157 } 1158 1159 void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) 1160 { 1161 __init_extent_tree_info(&sbi->extent_tree[EX_READ]); 1162 __init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]); 1163 1164 /* initialize for block age extents */ 1165 atomic64_set(&sbi->allocated_data_blocks, 0); 1166 sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD; 1167 sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD; 1168 sbi->last_age_weight = LAST_AGE_WEIGHT; 1169 } 1170 1171 int __init f2fs_create_extent_cache(void) 1172 { 1173 extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", 1174 sizeof(struct extent_tree)); 1175 if (!extent_tree_slab) 1176 return -ENOMEM; 1177 extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", 1178 sizeof(struct extent_node)); 1179 if (!extent_node_slab) { 1180 kmem_cache_destroy(extent_tree_slab); 1181 return -ENOMEM; 1182 } 1183 return 0; 1184 } 1185 1186 void f2fs_destroy_extent_cache(void) 1187 { 1188 kmem_cache_destroy(extent_node_slab); 1189 kmem_cache_destroy(extent_tree_slab); 1190 } 1191