1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * f2fs extent cache support 4 * 5 * Copyright (c) 2015 Motorola Mobility 6 * Copyright (c) 2015 Samsung Electronics 7 * Authors: Jaegeuk Kim <jaegeuk@kernel.org> 8 * Chao Yu <chao2.yu@samsung.com> 9 * 10 * block_age-based extent cache added by: 11 * Copyright (c) 2022 xiaomi Co., Ltd. 12 * http://www.xiaomi.com/ 13 */ 14 15 #include <linux/fs.h> 16 #include <linux/f2fs_fs.h> 17 18 #include "f2fs.h" 19 #include "node.h" 20 #include <trace/events/f2fs.h> 21 22 bool sanity_check_extent_cache(struct inode *inode) 23 { 24 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 25 struct f2fs_inode_info *fi = F2FS_I(inode); 26 struct extent_tree *et = fi->extent_tree[EX_READ]; 27 struct extent_info *ei; 28 29 if (!et) 30 return true; 31 32 ei = &et->largest; 33 if (!ei->len) 34 return true; 35 36 /* Let's drop, if checkpoint got corrupted. */ 37 if (is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) { 38 ei->len = 0; 39 et->largest_updated = true; 40 return true; 41 } 42 43 if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) || 44 !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, 45 DATA_GENERIC_ENHANCE)) { 46 set_sbi_flag(sbi, SBI_NEED_FSCK); 47 f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", 48 __func__, inode->i_ino, 49 ei->blk, ei->fofs, ei->len); 50 return false; 51 } 52 return true; 53 } 54 55 static void __set_extent_info(struct extent_info *ei, 56 unsigned int fofs, unsigned int len, 57 block_t blk, bool keep_clen, 58 unsigned long age, unsigned long last_blocks, 59 enum extent_type type) 60 { 61 ei->fofs = fofs; 62 ei->len = len; 63 64 if (type == EX_READ) { 65 ei->blk = blk; 66 if (keep_clen) 67 return; 68 #ifdef CONFIG_F2FS_FS_COMPRESSION 69 ei->c_len = 0; 70 #endif 71 } else if (type == EX_BLOCK_AGE) { 72 ei->age = age; 73 ei->last_blocks = last_blocks; 74 } 75 } 76 77 static bool __may_read_extent_tree(struct inode *inode) 78 { 79 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 80 81 if (!test_opt(sbi, READ_EXTENT_CACHE)) 82 return false; 83 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 84 return false; 85 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && 86 !f2fs_sb_has_readonly(sbi)) 87 return false; 88 return S_ISREG(inode->i_mode); 89 } 90 91 static bool __may_age_extent_tree(struct inode *inode) 92 { 93 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 94 95 if (!test_opt(sbi, AGE_EXTENT_CACHE)) 96 return false; 97 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) 98 return false; 99 if (file_is_cold(inode)) 100 return false; 101 102 return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); 103 } 104 105 static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) 106 { 107 if (type == EX_READ) 108 return __may_read_extent_tree(inode); 109 else if (type == EX_BLOCK_AGE) 110 return __may_age_extent_tree(inode); 111 return false; 112 } 113 114 static bool __may_extent_tree(struct inode *inode, enum extent_type type) 115 { 116 /* 117 * for recovered files during mount do not create extents 118 * if shrinker is not registered. 119 */ 120 if (list_empty(&F2FS_I_SB(inode)->s_list)) 121 return false; 122 123 return __init_may_extent_tree(inode, type); 124 } 125 126 static void __try_update_largest_extent(struct extent_tree *et, 127 struct extent_node *en) 128 { 129 if (et->type != EX_READ) 130 return; 131 if (en->ei.len <= et->largest.len) 132 return; 133 134 et->largest = en->ei; 135 et->largest_updated = true; 136 } 137 138 static bool __is_extent_mergeable(struct extent_info *back, 139 struct extent_info *front, enum extent_type type) 140 { 141 if (type == EX_READ) { 142 #ifdef CONFIG_F2FS_FS_COMPRESSION 143 if (back->c_len && back->len != back->c_len) 144 return false; 145 if (front->c_len && front->len != front->c_len) 146 return false; 147 #endif 148 return (back->fofs + back->len == front->fofs && 149 back->blk + back->len == front->blk); 150 } else if (type == EX_BLOCK_AGE) { 151 return (back->fofs + back->len == front->fofs && 152 abs(back->age - front->age) <= SAME_AGE_REGION && 153 abs(back->last_blocks - front->last_blocks) <= 154 SAME_AGE_REGION); 155 } 156 return false; 157 } 158 159 static bool __is_back_mergeable(struct extent_info *cur, 160 struct extent_info *back, enum extent_type type) 161 { 162 return __is_extent_mergeable(back, cur, type); 163 } 164 165 static bool __is_front_mergeable(struct extent_info *cur, 166 struct extent_info *front, enum extent_type type) 167 { 168 return __is_extent_mergeable(cur, front, type); 169 } 170 171 static struct extent_node *__lookup_extent_node(struct rb_root_cached *root, 172 struct extent_node *cached_en, unsigned int fofs) 173 { 174 struct rb_node *node = root->rb_root.rb_node; 175 struct extent_node *en; 176 177 /* check a cached entry */ 178 if (cached_en && cached_en->ei.fofs <= fofs && 179 cached_en->ei.fofs + cached_en->ei.len > fofs) 180 return cached_en; 181 182 /* check rb_tree */ 183 while (node) { 184 en = rb_entry(node, struct extent_node, rb_node); 185 186 if (fofs < en->ei.fofs) 187 node = node->rb_left; 188 else if (fofs >= en->ei.fofs + en->ei.len) 189 node = node->rb_right; 190 else 191 return en; 192 } 193 return NULL; 194 } 195 196 /* 197 * lookup rb entry in position of @fofs in rb-tree, 198 * if hit, return the entry, otherwise, return NULL 199 * @prev_ex: extent before fofs 200 * @next_ex: extent after fofs 201 * @insert_p: insert point for new extent at fofs 202 * in order to simplify the insertion after. 203 * tree must stay unchanged between lookup and insertion. 204 */ 205 static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root, 206 struct extent_node *cached_en, 207 unsigned int fofs, 208 struct extent_node **prev_entry, 209 struct extent_node **next_entry, 210 struct rb_node ***insert_p, 211 struct rb_node **insert_parent, 212 bool *leftmost) 213 { 214 struct rb_node **pnode = &root->rb_root.rb_node; 215 struct rb_node *parent = NULL, *tmp_node; 216 struct extent_node *en = cached_en; 217 218 *insert_p = NULL; 219 *insert_parent = NULL; 220 *prev_entry = NULL; 221 *next_entry = NULL; 222 223 if (RB_EMPTY_ROOT(&root->rb_root)) 224 return NULL; 225 226 if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs) 227 goto lookup_neighbors; 228 229 *leftmost = true; 230 231 while (*pnode) { 232 parent = *pnode; 233 en = rb_entry(*pnode, struct extent_node, rb_node); 234 235 if (fofs < en->ei.fofs) { 236 pnode = &(*pnode)->rb_left; 237 } else if (fofs >= en->ei.fofs + en->ei.len) { 238 pnode = &(*pnode)->rb_right; 239 *leftmost = false; 240 } else { 241 goto lookup_neighbors; 242 } 243 } 244 245 *insert_p = pnode; 246 *insert_parent = parent; 247 248 en = rb_entry(parent, struct extent_node, rb_node); 249 tmp_node = parent; 250 if (parent && fofs > en->ei.fofs) 251 tmp_node = rb_next(parent); 252 *next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node); 253 254 tmp_node = parent; 255 if (parent && fofs < en->ei.fofs) 256 tmp_node = rb_prev(parent); 257 *prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node); 258 return NULL; 259 260 lookup_neighbors: 261 if (fofs == en->ei.fofs) { 262 /* lookup prev node for merging backward later */ 263 tmp_node = rb_prev(&en->rb_node); 264 *prev_entry = rb_entry_safe(tmp_node, 265 struct extent_node, rb_node); 266 } 267 if (fofs == en->ei.fofs + en->ei.len - 1) { 268 /* lookup next node for merging frontward later */ 269 tmp_node = rb_next(&en->rb_node); 270 *next_entry = rb_entry_safe(tmp_node, 271 struct extent_node, rb_node); 272 } 273 return en; 274 } 275 276 static struct kmem_cache *extent_tree_slab; 277 static struct kmem_cache *extent_node_slab; 278 279 static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, 280 struct extent_tree *et, struct extent_info *ei, 281 struct rb_node *parent, struct rb_node **p, 282 bool leftmost) 283 { 284 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 285 struct extent_node *en; 286 287 en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi); 288 if (!en) 289 return NULL; 290 291 en->ei = *ei; 292 INIT_LIST_HEAD(&en->list); 293 en->et = et; 294 295 rb_link_node(&en->rb_node, parent, p); 296 rb_insert_color_cached(&en->rb_node, &et->root, leftmost); 297 atomic_inc(&et->node_cnt); 298 atomic_inc(&eti->total_ext_node); 299 return en; 300 } 301 302 static void __detach_extent_node(struct f2fs_sb_info *sbi, 303 struct extent_tree *et, struct extent_node *en) 304 { 305 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 306 307 rb_erase_cached(&en->rb_node, &et->root); 308 atomic_dec(&et->node_cnt); 309 atomic_dec(&eti->total_ext_node); 310 311 if (et->cached_en == en) 312 et->cached_en = NULL; 313 kmem_cache_free(extent_node_slab, en); 314 } 315 316 /* 317 * Flow to release an extent_node: 318 * 1. list_del_init 319 * 2. __detach_extent_node 320 * 3. kmem_cache_free. 321 */ 322 static void __release_extent_node(struct f2fs_sb_info *sbi, 323 struct extent_tree *et, struct extent_node *en) 324 { 325 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 326 327 spin_lock(&eti->extent_lock); 328 f2fs_bug_on(sbi, list_empty(&en->list)); 329 list_del_init(&en->list); 330 spin_unlock(&eti->extent_lock); 331 332 __detach_extent_node(sbi, et, en); 333 } 334 335 static struct extent_tree *__grab_extent_tree(struct inode *inode, 336 enum extent_type type) 337 { 338 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 339 struct extent_tree_info *eti = &sbi->extent_tree[type]; 340 struct extent_tree *et; 341 nid_t ino = inode->i_ino; 342 343 mutex_lock(&eti->extent_tree_lock); 344 et = radix_tree_lookup(&eti->extent_tree_root, ino); 345 if (!et) { 346 et = f2fs_kmem_cache_alloc(extent_tree_slab, 347 GFP_NOFS, true, NULL); 348 f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et); 349 memset(et, 0, sizeof(struct extent_tree)); 350 et->ino = ino; 351 et->type = type; 352 et->root = RB_ROOT_CACHED; 353 et->cached_en = NULL; 354 rwlock_init(&et->lock); 355 INIT_LIST_HEAD(&et->list); 356 atomic_set(&et->node_cnt, 0); 357 atomic_inc(&eti->total_ext_tree); 358 } else { 359 atomic_dec(&eti->total_zombie_tree); 360 list_del_init(&et->list); 361 } 362 mutex_unlock(&eti->extent_tree_lock); 363 364 /* never died until evict_inode */ 365 F2FS_I(inode)->extent_tree[type] = et; 366 367 return et; 368 } 369 370 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, 371 struct extent_tree *et) 372 { 373 struct rb_node *node, *next; 374 struct extent_node *en; 375 unsigned int count = atomic_read(&et->node_cnt); 376 377 node = rb_first_cached(&et->root); 378 while (node) { 379 next = rb_next(node); 380 en = rb_entry(node, struct extent_node, rb_node); 381 __release_extent_node(sbi, et, en); 382 node = next; 383 } 384 385 return count - atomic_read(&et->node_cnt); 386 } 387 388 static void __drop_largest_extent(struct extent_tree *et, 389 pgoff_t fofs, unsigned int len) 390 { 391 if (fofs < et->largest.fofs + et->largest.len && 392 fofs + len > et->largest.fofs) { 393 et->largest.len = 0; 394 et->largest_updated = true; 395 } 396 } 397 398 void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) 399 { 400 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 401 struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; 402 struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; 403 struct extent_tree *et; 404 struct extent_node *en; 405 struct extent_info ei; 406 407 if (!__may_extent_tree(inode, EX_READ)) { 408 /* drop largest read extent */ 409 if (i_ext && i_ext->len) { 410 f2fs_wait_on_page_writeback(ipage, NODE, true, true); 411 i_ext->len = 0; 412 set_page_dirty(ipage); 413 } 414 goto out; 415 } 416 417 et = __grab_extent_tree(inode, EX_READ); 418 419 if (!i_ext || !i_ext->len) 420 goto out; 421 422 get_read_extent_info(&ei, i_ext); 423 424 write_lock(&et->lock); 425 if (atomic_read(&et->node_cnt)) 426 goto unlock_out; 427 428 en = __attach_extent_node(sbi, et, &ei, NULL, 429 &et->root.rb_root.rb_node, true); 430 if (en) { 431 et->largest = en->ei; 432 et->cached_en = en; 433 434 spin_lock(&eti->extent_lock); 435 list_add_tail(&en->list, &eti->extent_list); 436 spin_unlock(&eti->extent_lock); 437 } 438 unlock_out: 439 write_unlock(&et->lock); 440 out: 441 if (!F2FS_I(inode)->extent_tree[EX_READ]) 442 set_inode_flag(inode, FI_NO_EXTENT); 443 } 444 445 void f2fs_init_age_extent_tree(struct inode *inode) 446 { 447 if (!__init_may_extent_tree(inode, EX_BLOCK_AGE)) 448 return; 449 __grab_extent_tree(inode, EX_BLOCK_AGE); 450 } 451 452 void f2fs_init_extent_tree(struct inode *inode) 453 { 454 /* initialize read cache */ 455 if (__init_may_extent_tree(inode, EX_READ)) 456 __grab_extent_tree(inode, EX_READ); 457 458 /* initialize block age cache */ 459 if (__init_may_extent_tree(inode, EX_BLOCK_AGE)) 460 __grab_extent_tree(inode, EX_BLOCK_AGE); 461 } 462 463 static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, 464 struct extent_info *ei, enum extent_type type) 465 { 466 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 467 struct extent_tree_info *eti = &sbi->extent_tree[type]; 468 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 469 struct extent_node *en; 470 bool ret = false; 471 472 if (!et) 473 return false; 474 475 trace_f2fs_lookup_extent_tree_start(inode, pgofs, type); 476 477 read_lock(&et->lock); 478 479 if (type == EX_READ && 480 et->largest.fofs <= pgofs && 481 et->largest.fofs + et->largest.len > pgofs) { 482 *ei = et->largest; 483 ret = true; 484 stat_inc_largest_node_hit(sbi); 485 goto out; 486 } 487 488 en = __lookup_extent_node(&et->root, et->cached_en, pgofs); 489 if (!en) 490 goto out; 491 492 if (en == et->cached_en) 493 stat_inc_cached_node_hit(sbi, type); 494 else 495 stat_inc_rbtree_node_hit(sbi, type); 496 497 *ei = en->ei; 498 spin_lock(&eti->extent_lock); 499 if (!list_empty(&en->list)) { 500 list_move_tail(&en->list, &eti->extent_list); 501 et->cached_en = en; 502 } 503 spin_unlock(&eti->extent_lock); 504 ret = true; 505 out: 506 stat_inc_total_hit(sbi, type); 507 read_unlock(&et->lock); 508 509 if (type == EX_READ) 510 trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei); 511 else if (type == EX_BLOCK_AGE) 512 trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei); 513 return ret; 514 } 515 516 static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, 517 struct extent_tree *et, struct extent_info *ei, 518 struct extent_node *prev_ex, 519 struct extent_node *next_ex) 520 { 521 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 522 struct extent_node *en = NULL; 523 524 if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) { 525 prev_ex->ei.len += ei->len; 526 ei = &prev_ex->ei; 527 en = prev_ex; 528 } 529 530 if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) { 531 next_ex->ei.fofs = ei->fofs; 532 next_ex->ei.len += ei->len; 533 if (et->type == EX_READ) 534 next_ex->ei.blk = ei->blk; 535 if (en) 536 __release_extent_node(sbi, et, prev_ex); 537 538 en = next_ex; 539 } 540 541 if (!en) 542 return NULL; 543 544 __try_update_largest_extent(et, en); 545 546 spin_lock(&eti->extent_lock); 547 if (!list_empty(&en->list)) { 548 list_move_tail(&en->list, &eti->extent_list); 549 et->cached_en = en; 550 } 551 spin_unlock(&eti->extent_lock); 552 return en; 553 } 554 555 static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, 556 struct extent_tree *et, struct extent_info *ei, 557 struct rb_node **insert_p, 558 struct rb_node *insert_parent, 559 bool leftmost) 560 { 561 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 562 struct rb_node **p = &et->root.rb_root.rb_node; 563 struct rb_node *parent = NULL; 564 struct extent_node *en = NULL; 565 566 if (insert_p && insert_parent) { 567 parent = insert_parent; 568 p = insert_p; 569 goto do_insert; 570 } 571 572 leftmost = true; 573 574 /* look up extent_node in the rb tree */ 575 while (*p) { 576 parent = *p; 577 en = rb_entry(parent, struct extent_node, rb_node); 578 579 if (ei->fofs < en->ei.fofs) { 580 p = &(*p)->rb_left; 581 } else if (ei->fofs >= en->ei.fofs + en->ei.len) { 582 p = &(*p)->rb_right; 583 leftmost = false; 584 } else { 585 f2fs_bug_on(sbi, 1); 586 } 587 } 588 589 do_insert: 590 en = __attach_extent_node(sbi, et, ei, parent, p, leftmost); 591 if (!en) 592 return NULL; 593 594 __try_update_largest_extent(et, en); 595 596 /* update in global extent list */ 597 spin_lock(&eti->extent_lock); 598 list_add_tail(&en->list, &eti->extent_list); 599 et->cached_en = en; 600 spin_unlock(&eti->extent_lock); 601 return en; 602 } 603 604 static void __update_extent_tree_range(struct inode *inode, 605 struct extent_info *tei, enum extent_type type) 606 { 607 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 608 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 609 struct extent_node *en = NULL, *en1 = NULL; 610 struct extent_node *prev_en = NULL, *next_en = NULL; 611 struct extent_info ei, dei, prev; 612 struct rb_node **insert_p = NULL, *insert_parent = NULL; 613 unsigned int fofs = tei->fofs, len = tei->len; 614 unsigned int end = fofs + len; 615 bool updated = false; 616 bool leftmost = false; 617 618 if (!et) 619 return; 620 621 if (type == EX_READ) 622 trace_f2fs_update_read_extent_tree_range(inode, fofs, len, 623 tei->blk, 0); 624 else if (type == EX_BLOCK_AGE) 625 trace_f2fs_update_age_extent_tree_range(inode, fofs, len, 626 tei->age, tei->last_blocks); 627 628 write_lock(&et->lock); 629 630 if (type == EX_READ) { 631 if (is_inode_flag_set(inode, FI_NO_EXTENT)) { 632 write_unlock(&et->lock); 633 return; 634 } 635 636 prev = et->largest; 637 dei.len = 0; 638 639 /* 640 * drop largest extent before lookup, in case it's already 641 * been shrunk from extent tree 642 */ 643 __drop_largest_extent(et, fofs, len); 644 } 645 646 /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ 647 en = __lookup_extent_node_ret(&et->root, 648 et->cached_en, fofs, 649 &prev_en, &next_en, 650 &insert_p, &insert_parent, 651 &leftmost); 652 if (!en) 653 en = next_en; 654 655 /* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */ 656 while (en && en->ei.fofs < end) { 657 unsigned int org_end; 658 int parts = 0; /* # of parts current extent split into */ 659 660 next_en = en1 = NULL; 661 662 dei = en->ei; 663 org_end = dei.fofs + dei.len; 664 f2fs_bug_on(sbi, fofs >= org_end); 665 666 if (fofs > dei.fofs && (type != EX_READ || 667 fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) { 668 en->ei.len = fofs - en->ei.fofs; 669 prev_en = en; 670 parts = 1; 671 } 672 673 if (end < org_end && (type != EX_READ || 674 org_end - end >= F2FS_MIN_EXTENT_LEN)) { 675 if (parts) { 676 __set_extent_info(&ei, 677 end, org_end - end, 678 end - dei.fofs + dei.blk, false, 679 dei.age, dei.last_blocks, 680 type); 681 en1 = __insert_extent_tree(sbi, et, &ei, 682 NULL, NULL, true); 683 next_en = en1; 684 } else { 685 __set_extent_info(&en->ei, 686 end, en->ei.len - (end - dei.fofs), 687 en->ei.blk + (end - dei.fofs), true, 688 dei.age, dei.last_blocks, 689 type); 690 next_en = en; 691 } 692 parts++; 693 } 694 695 if (!next_en) { 696 struct rb_node *node = rb_next(&en->rb_node); 697 698 next_en = rb_entry_safe(node, struct extent_node, 699 rb_node); 700 } 701 702 if (parts) 703 __try_update_largest_extent(et, en); 704 else 705 __release_extent_node(sbi, et, en); 706 707 /* 708 * if original extent is split into zero or two parts, extent 709 * tree has been altered by deletion or insertion, therefore 710 * invalidate pointers regard to tree. 711 */ 712 if (parts != 1) { 713 insert_p = NULL; 714 insert_parent = NULL; 715 } 716 en = next_en; 717 } 718 719 if (type == EX_BLOCK_AGE) 720 goto update_age_extent_cache; 721 722 /* 3. update extent in read extent cache */ 723 BUG_ON(type != EX_READ); 724 725 if (tei->blk) { 726 __set_extent_info(&ei, fofs, len, tei->blk, false, 727 0, 0, EX_READ); 728 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 729 __insert_extent_tree(sbi, et, &ei, 730 insert_p, insert_parent, leftmost); 731 732 /* give up extent_cache, if split and small updates happen */ 733 if (dei.len >= 1 && 734 prev.len < F2FS_MIN_EXTENT_LEN && 735 et->largest.len < F2FS_MIN_EXTENT_LEN) { 736 et->largest.len = 0; 737 et->largest_updated = true; 738 set_inode_flag(inode, FI_NO_EXTENT); 739 } 740 } 741 742 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 743 __free_extent_tree(sbi, et); 744 745 if (et->largest_updated) { 746 et->largest_updated = false; 747 updated = true; 748 } 749 goto out_read_extent_cache; 750 update_age_extent_cache: 751 if (!tei->last_blocks) 752 goto out_read_extent_cache; 753 754 __set_extent_info(&ei, fofs, len, 0, false, 755 tei->age, tei->last_blocks, EX_BLOCK_AGE); 756 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 757 __insert_extent_tree(sbi, et, &ei, 758 insert_p, insert_parent, leftmost); 759 out_read_extent_cache: 760 write_unlock(&et->lock); 761 762 if (updated) 763 f2fs_mark_inode_dirty_sync(inode, true); 764 } 765 766 #ifdef CONFIG_F2FS_FS_COMPRESSION 767 void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, 768 pgoff_t fofs, block_t blkaddr, unsigned int llen, 769 unsigned int c_len) 770 { 771 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 772 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; 773 struct extent_node *en = NULL; 774 struct extent_node *prev_en = NULL, *next_en = NULL; 775 struct extent_info ei; 776 struct rb_node **insert_p = NULL, *insert_parent = NULL; 777 bool leftmost = false; 778 779 trace_f2fs_update_read_extent_tree_range(inode, fofs, llen, 780 blkaddr, c_len); 781 782 /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */ 783 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 784 return; 785 786 write_lock(&et->lock); 787 788 en = __lookup_extent_node_ret(&et->root, 789 et->cached_en, fofs, 790 &prev_en, &next_en, 791 &insert_p, &insert_parent, 792 &leftmost); 793 if (en) 794 goto unlock_out; 795 796 __set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ); 797 ei.c_len = c_len; 798 799 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 800 __insert_extent_tree(sbi, et, &ei, 801 insert_p, insert_parent, leftmost); 802 unlock_out: 803 write_unlock(&et->lock); 804 } 805 #endif 806 807 static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi, 808 unsigned long long new, 809 unsigned long long old) 810 { 811 unsigned int rem_old, rem_new; 812 unsigned long long res; 813 unsigned int weight = sbi->last_age_weight; 814 815 res = div_u64_rem(new, 100, &rem_new) * (100 - weight) 816 + div_u64_rem(old, 100, &rem_old) * weight; 817 818 if (rem_new) 819 res += rem_new * (100 - weight) / 100; 820 if (rem_old) 821 res += rem_old * weight / 100; 822 823 return res; 824 } 825 826 /* This returns a new age and allocated blocks in ei */ 827 static int __get_new_block_age(struct inode *inode, struct extent_info *ei, 828 block_t blkaddr) 829 { 830 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 831 loff_t f_size = i_size_read(inode); 832 unsigned long long cur_blocks = 833 atomic64_read(&sbi->allocated_data_blocks); 834 struct extent_info tei = *ei; /* only fofs and len are valid */ 835 836 /* 837 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last 838 * file block even in seq write. So don't record age for newly last file 839 * block here. 840 */ 841 if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) && 842 blkaddr == NEW_ADDR) 843 return -EINVAL; 844 845 if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) { 846 unsigned long long cur_age; 847 848 if (cur_blocks >= tei.last_blocks) 849 cur_age = cur_blocks - tei.last_blocks; 850 else 851 /* allocated_data_blocks overflow */ 852 cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks; 853 854 if (tei.age) 855 ei->age = __calculate_block_age(sbi, cur_age, tei.age); 856 else 857 ei->age = cur_age; 858 ei->last_blocks = cur_blocks; 859 WARN_ON(ei->age > cur_blocks); 860 return 0; 861 } 862 863 f2fs_bug_on(sbi, blkaddr == NULL_ADDR); 864 865 /* the data block was allocated for the first time */ 866 if (blkaddr == NEW_ADDR) 867 goto out; 868 869 if (__is_valid_data_blkaddr(blkaddr) && 870 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { 871 f2fs_bug_on(sbi, 1); 872 return -EINVAL; 873 } 874 out: 875 /* 876 * init block age with zero, this can happen when the block age extent 877 * was reclaimed due to memory constraint or system reboot 878 */ 879 ei->age = 0; 880 ei->last_blocks = cur_blocks; 881 return 0; 882 } 883 884 static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type) 885 { 886 struct extent_info ei = {}; 887 888 if (!__may_extent_tree(dn->inode, type)) 889 return; 890 891 ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + 892 dn->ofs_in_node; 893 ei.len = 1; 894 895 if (type == EX_READ) { 896 if (dn->data_blkaddr == NEW_ADDR) 897 ei.blk = NULL_ADDR; 898 else 899 ei.blk = dn->data_blkaddr; 900 } else if (type == EX_BLOCK_AGE) { 901 if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr)) 902 return; 903 } 904 __update_extent_tree_range(dn->inode, &ei, type); 905 } 906 907 static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink, 908 enum extent_type type) 909 { 910 struct extent_tree_info *eti = &sbi->extent_tree[type]; 911 struct extent_tree *et, *next; 912 struct extent_node *en; 913 unsigned int node_cnt = 0, tree_cnt = 0; 914 int remained; 915 916 if (!atomic_read(&eti->total_zombie_tree)) 917 goto free_node; 918 919 if (!mutex_trylock(&eti->extent_tree_lock)) 920 goto out; 921 922 /* 1. remove unreferenced extent tree */ 923 list_for_each_entry_safe(et, next, &eti->zombie_list, list) { 924 if (atomic_read(&et->node_cnt)) { 925 write_lock(&et->lock); 926 node_cnt += __free_extent_tree(sbi, et); 927 write_unlock(&et->lock); 928 } 929 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 930 list_del_init(&et->list); 931 radix_tree_delete(&eti->extent_tree_root, et->ino); 932 kmem_cache_free(extent_tree_slab, et); 933 atomic_dec(&eti->total_ext_tree); 934 atomic_dec(&eti->total_zombie_tree); 935 tree_cnt++; 936 937 if (node_cnt + tree_cnt >= nr_shrink) 938 goto unlock_out; 939 cond_resched(); 940 } 941 mutex_unlock(&eti->extent_tree_lock); 942 943 free_node: 944 /* 2. remove LRU extent entries */ 945 if (!mutex_trylock(&eti->extent_tree_lock)) 946 goto out; 947 948 remained = nr_shrink - (node_cnt + tree_cnt); 949 950 spin_lock(&eti->extent_lock); 951 for (; remained > 0; remained--) { 952 if (list_empty(&eti->extent_list)) 953 break; 954 en = list_first_entry(&eti->extent_list, 955 struct extent_node, list); 956 et = en->et; 957 if (!write_trylock(&et->lock)) { 958 /* refresh this extent node's position in extent list */ 959 list_move_tail(&en->list, &eti->extent_list); 960 continue; 961 } 962 963 list_del_init(&en->list); 964 spin_unlock(&eti->extent_lock); 965 966 __detach_extent_node(sbi, et, en); 967 968 write_unlock(&et->lock); 969 node_cnt++; 970 spin_lock(&eti->extent_lock); 971 } 972 spin_unlock(&eti->extent_lock); 973 974 unlock_out: 975 mutex_unlock(&eti->extent_tree_lock); 976 out: 977 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type); 978 979 return node_cnt + tree_cnt; 980 } 981 982 /* read extent cache operations */ 983 bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, 984 struct extent_info *ei) 985 { 986 if (!__may_extent_tree(inode, EX_READ)) 987 return false; 988 989 return __lookup_extent_tree(inode, pgofs, ei, EX_READ); 990 } 991 992 bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index, 993 block_t *blkaddr) 994 { 995 struct extent_info ei = {}; 996 997 if (!f2fs_lookup_read_extent_cache(inode, index, &ei)) 998 return false; 999 *blkaddr = ei.blk + index - ei.fofs; 1000 return true; 1001 } 1002 1003 void f2fs_update_read_extent_cache(struct dnode_of_data *dn) 1004 { 1005 return __update_extent_cache(dn, EX_READ); 1006 } 1007 1008 void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, 1009 pgoff_t fofs, block_t blkaddr, unsigned int len) 1010 { 1011 struct extent_info ei = { 1012 .fofs = fofs, 1013 .len = len, 1014 .blk = blkaddr, 1015 }; 1016 1017 if (!__may_extent_tree(dn->inode, EX_READ)) 1018 return; 1019 1020 __update_extent_tree_range(dn->inode, &ei, EX_READ); 1021 } 1022 1023 unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 1024 { 1025 if (!test_opt(sbi, READ_EXTENT_CACHE)) 1026 return 0; 1027 1028 return __shrink_extent_tree(sbi, nr_shrink, EX_READ); 1029 } 1030 1031 /* block age extent cache operations */ 1032 bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs, 1033 struct extent_info *ei) 1034 { 1035 if (!__may_extent_tree(inode, EX_BLOCK_AGE)) 1036 return false; 1037 1038 return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE); 1039 } 1040 1041 void f2fs_update_age_extent_cache(struct dnode_of_data *dn) 1042 { 1043 return __update_extent_cache(dn, EX_BLOCK_AGE); 1044 } 1045 1046 void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, 1047 pgoff_t fofs, unsigned int len) 1048 { 1049 struct extent_info ei = { 1050 .fofs = fofs, 1051 .len = len, 1052 }; 1053 1054 if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE)) 1055 return; 1056 1057 __update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE); 1058 } 1059 1060 unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 1061 { 1062 if (!test_opt(sbi, AGE_EXTENT_CACHE)) 1063 return 0; 1064 1065 return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE); 1066 } 1067 1068 static unsigned int __destroy_extent_node(struct inode *inode, 1069 enum extent_type type) 1070 { 1071 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1072 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1073 unsigned int node_cnt = 0; 1074 1075 if (!et || !atomic_read(&et->node_cnt)) 1076 return 0; 1077 1078 write_lock(&et->lock); 1079 node_cnt = __free_extent_tree(sbi, et); 1080 write_unlock(&et->lock); 1081 1082 return node_cnt; 1083 } 1084 1085 void f2fs_destroy_extent_node(struct inode *inode) 1086 { 1087 __destroy_extent_node(inode, EX_READ); 1088 __destroy_extent_node(inode, EX_BLOCK_AGE); 1089 } 1090 1091 static void __drop_extent_tree(struct inode *inode, enum extent_type type) 1092 { 1093 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1094 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1095 bool updated = false; 1096 1097 if (!__may_extent_tree(inode, type)) 1098 return; 1099 1100 write_lock(&et->lock); 1101 __free_extent_tree(sbi, et); 1102 if (type == EX_READ) { 1103 set_inode_flag(inode, FI_NO_EXTENT); 1104 if (et->largest.len) { 1105 et->largest.len = 0; 1106 updated = true; 1107 } 1108 } 1109 write_unlock(&et->lock); 1110 if (updated) 1111 f2fs_mark_inode_dirty_sync(inode, true); 1112 } 1113 1114 void f2fs_drop_extent_tree(struct inode *inode) 1115 { 1116 __drop_extent_tree(inode, EX_READ); 1117 __drop_extent_tree(inode, EX_BLOCK_AGE); 1118 } 1119 1120 static void __destroy_extent_tree(struct inode *inode, enum extent_type type) 1121 { 1122 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1123 struct extent_tree_info *eti = &sbi->extent_tree[type]; 1124 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1125 unsigned int node_cnt = 0; 1126 1127 if (!et) 1128 return; 1129 1130 if (inode->i_nlink && !is_bad_inode(inode) && 1131 atomic_read(&et->node_cnt)) { 1132 mutex_lock(&eti->extent_tree_lock); 1133 list_add_tail(&et->list, &eti->zombie_list); 1134 atomic_inc(&eti->total_zombie_tree); 1135 mutex_unlock(&eti->extent_tree_lock); 1136 return; 1137 } 1138 1139 /* free all extent info belong to this extent tree */ 1140 node_cnt = __destroy_extent_node(inode, type); 1141 1142 /* delete extent tree entry in radix tree */ 1143 mutex_lock(&eti->extent_tree_lock); 1144 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 1145 radix_tree_delete(&eti->extent_tree_root, inode->i_ino); 1146 kmem_cache_free(extent_tree_slab, et); 1147 atomic_dec(&eti->total_ext_tree); 1148 mutex_unlock(&eti->extent_tree_lock); 1149 1150 F2FS_I(inode)->extent_tree[type] = NULL; 1151 1152 trace_f2fs_destroy_extent_tree(inode, node_cnt, type); 1153 } 1154 1155 void f2fs_destroy_extent_tree(struct inode *inode) 1156 { 1157 __destroy_extent_tree(inode, EX_READ); 1158 __destroy_extent_tree(inode, EX_BLOCK_AGE); 1159 } 1160 1161 static void __init_extent_tree_info(struct extent_tree_info *eti) 1162 { 1163 INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO); 1164 mutex_init(&eti->extent_tree_lock); 1165 INIT_LIST_HEAD(&eti->extent_list); 1166 spin_lock_init(&eti->extent_lock); 1167 atomic_set(&eti->total_ext_tree, 0); 1168 INIT_LIST_HEAD(&eti->zombie_list); 1169 atomic_set(&eti->total_zombie_tree, 0); 1170 atomic_set(&eti->total_ext_node, 0); 1171 } 1172 1173 void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) 1174 { 1175 __init_extent_tree_info(&sbi->extent_tree[EX_READ]); 1176 __init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]); 1177 1178 /* initialize for block age extents */ 1179 atomic64_set(&sbi->allocated_data_blocks, 0); 1180 sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD; 1181 sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD; 1182 sbi->last_age_weight = LAST_AGE_WEIGHT; 1183 } 1184 1185 int __init f2fs_create_extent_cache(void) 1186 { 1187 extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", 1188 sizeof(struct extent_tree)); 1189 if (!extent_tree_slab) 1190 return -ENOMEM; 1191 extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", 1192 sizeof(struct extent_node)); 1193 if (!extent_node_slab) { 1194 kmem_cache_destroy(extent_tree_slab); 1195 return -ENOMEM; 1196 } 1197 return 0; 1198 } 1199 1200 void f2fs_destroy_extent_cache(void) 1201 { 1202 kmem_cache_destroy(extent_node_slab); 1203 kmem_cache_destroy(extent_tree_slab); 1204 } 1205