1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * f2fs extent cache support
4 *
5 * Copyright (c) 2015 Motorola Mobility
6 * Copyright (c) 2015 Samsung Electronics
7 * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
8 * Chao Yu <chao2.yu@samsung.com>
9 *
10 * block_age-based extent cache added by:
11 * Copyright (c) 2022 xiaomi Co., Ltd.
12 * http://www.xiaomi.com/
13 */
14
15 #include <linux/fs.h>
16 #include <linux/f2fs_fs.h>
17
18 #include "f2fs.h"
19 #include "node.h"
20 #include <trace/events/f2fs.h>
21
sanity_check_extent_cache(struct inode * inode,struct page * ipage)22 bool sanity_check_extent_cache(struct inode *inode, struct page *ipage)
23 {
24 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
25 struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
26 struct extent_info ei;
27
28 get_read_extent_info(&ei, i_ext);
29
30 if (!ei.len)
31 return true;
32
33 if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) ||
34 !f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1,
35 DATA_GENERIC_ENHANCE)) {
36 f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
37 __func__, inode->i_ino,
38 ei.blk, ei.fofs, ei.len);
39 return false;
40 }
41 return true;
42 }
43
__set_extent_info(struct extent_info * ei,unsigned int fofs,unsigned int len,block_t blk,bool keep_clen,unsigned long age,unsigned long last_blocks,enum extent_type type)44 static void __set_extent_info(struct extent_info *ei,
45 unsigned int fofs, unsigned int len,
46 block_t blk, bool keep_clen,
47 unsigned long age, unsigned long last_blocks,
48 enum extent_type type)
49 {
50 ei->fofs = fofs;
51 ei->len = len;
52
53 if (type == EX_READ) {
54 ei->blk = blk;
55 if (keep_clen)
56 return;
57 #ifdef CONFIG_F2FS_FS_COMPRESSION
58 ei->c_len = 0;
59 #endif
60 } else if (type == EX_BLOCK_AGE) {
61 ei->age = age;
62 ei->last_blocks = last_blocks;
63 }
64 }
65
__init_may_extent_tree(struct inode * inode,enum extent_type type)66 static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
67 {
68 if (type == EX_READ)
69 return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) &&
70 S_ISREG(inode->i_mode);
71 if (type == EX_BLOCK_AGE)
72 return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) &&
73 (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode));
74 return false;
75 }
76
__may_extent_tree(struct inode * inode,enum extent_type type)77 static bool __may_extent_tree(struct inode *inode, enum extent_type type)
78 {
79 /*
80 * for recovered files during mount do not create extents
81 * if shrinker is not registered.
82 */
83 if (list_empty(&F2FS_I_SB(inode)->s_list))
84 return false;
85
86 if (!__init_may_extent_tree(inode, type))
87 return false;
88
89 if (type == EX_READ) {
90 if (is_inode_flag_set(inode, FI_NO_EXTENT))
91 return false;
92 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
93 !f2fs_sb_has_readonly(F2FS_I_SB(inode)))
94 return false;
95 } else if (type == EX_BLOCK_AGE) {
96 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
97 return false;
98 if (file_is_cold(inode))
99 return false;
100 }
101 return true;
102 }
103
__try_update_largest_extent(struct extent_tree * et,struct extent_node * en)104 static void __try_update_largest_extent(struct extent_tree *et,
105 struct extent_node *en)
106 {
107 if (et->type != EX_READ)
108 return;
109 if (en->ei.len <= et->largest.len)
110 return;
111
112 et->largest = en->ei;
113 et->largest_updated = true;
114 }
115
__is_extent_mergeable(struct extent_info * back,struct extent_info * front,enum extent_type type)116 static bool __is_extent_mergeable(struct extent_info *back,
117 struct extent_info *front, enum extent_type type)
118 {
119 if (type == EX_READ) {
120 #ifdef CONFIG_F2FS_FS_COMPRESSION
121 if (back->c_len && back->len != back->c_len)
122 return false;
123 if (front->c_len && front->len != front->c_len)
124 return false;
125 #endif
126 return (back->fofs + back->len == front->fofs &&
127 back->blk + back->len == front->blk);
128 } else if (type == EX_BLOCK_AGE) {
129 return (back->fofs + back->len == front->fofs &&
130 abs(back->age - front->age) <= SAME_AGE_REGION &&
131 abs(back->last_blocks - front->last_blocks) <=
132 SAME_AGE_REGION);
133 }
134 return false;
135 }
136
__is_back_mergeable(struct extent_info * cur,struct extent_info * back,enum extent_type type)137 static bool __is_back_mergeable(struct extent_info *cur,
138 struct extent_info *back, enum extent_type type)
139 {
140 return __is_extent_mergeable(back, cur, type);
141 }
142
__is_front_mergeable(struct extent_info * cur,struct extent_info * front,enum extent_type type)143 static bool __is_front_mergeable(struct extent_info *cur,
144 struct extent_info *front, enum extent_type type)
145 {
146 return __is_extent_mergeable(cur, front, type);
147 }
148
__lookup_extent_node(struct rb_root_cached * root,struct extent_node * cached_en,unsigned int fofs)149 static struct extent_node *__lookup_extent_node(struct rb_root_cached *root,
150 struct extent_node *cached_en, unsigned int fofs)
151 {
152 struct rb_node *node = root->rb_root.rb_node;
153 struct extent_node *en;
154
155 /* check a cached entry */
156 if (cached_en && cached_en->ei.fofs <= fofs &&
157 cached_en->ei.fofs + cached_en->ei.len > fofs)
158 return cached_en;
159
160 /* check rb_tree */
161 while (node) {
162 en = rb_entry(node, struct extent_node, rb_node);
163
164 if (fofs < en->ei.fofs)
165 node = node->rb_left;
166 else if (fofs >= en->ei.fofs + en->ei.len)
167 node = node->rb_right;
168 else
169 return en;
170 }
171 return NULL;
172 }
173
174 /*
175 * lookup rb entry in position of @fofs in rb-tree,
176 * if hit, return the entry, otherwise, return NULL
177 * @prev_ex: extent before fofs
178 * @next_ex: extent after fofs
179 * @insert_p: insert point for new extent at fofs
180 * in order to simplify the insertion after.
181 * tree must stay unchanged between lookup and insertion.
182 */
__lookup_extent_node_ret(struct rb_root_cached * root,struct extent_node * cached_en,unsigned int fofs,struct extent_node ** prev_entry,struct extent_node ** next_entry,struct rb_node *** insert_p,struct rb_node ** insert_parent,bool * leftmost)183 static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root,
184 struct extent_node *cached_en,
185 unsigned int fofs,
186 struct extent_node **prev_entry,
187 struct extent_node **next_entry,
188 struct rb_node ***insert_p,
189 struct rb_node **insert_parent,
190 bool *leftmost)
191 {
192 struct rb_node **pnode = &root->rb_root.rb_node;
193 struct rb_node *parent = NULL, *tmp_node;
194 struct extent_node *en = cached_en;
195
196 *insert_p = NULL;
197 *insert_parent = NULL;
198 *prev_entry = NULL;
199 *next_entry = NULL;
200
201 if (RB_EMPTY_ROOT(&root->rb_root))
202 return NULL;
203
204 if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs)
205 goto lookup_neighbors;
206
207 *leftmost = true;
208
209 while (*pnode) {
210 parent = *pnode;
211 en = rb_entry(*pnode, struct extent_node, rb_node);
212
213 if (fofs < en->ei.fofs) {
214 pnode = &(*pnode)->rb_left;
215 } else if (fofs >= en->ei.fofs + en->ei.len) {
216 pnode = &(*pnode)->rb_right;
217 *leftmost = false;
218 } else {
219 goto lookup_neighbors;
220 }
221 }
222
223 *insert_p = pnode;
224 *insert_parent = parent;
225
226 en = rb_entry(parent, struct extent_node, rb_node);
227 tmp_node = parent;
228 if (parent && fofs > en->ei.fofs)
229 tmp_node = rb_next(parent);
230 *next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
231
232 tmp_node = parent;
233 if (parent && fofs < en->ei.fofs)
234 tmp_node = rb_prev(parent);
235 *prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
236 return NULL;
237
238 lookup_neighbors:
239 if (fofs == en->ei.fofs) {
240 /* lookup prev node for merging backward later */
241 tmp_node = rb_prev(&en->rb_node);
242 *prev_entry = rb_entry_safe(tmp_node,
243 struct extent_node, rb_node);
244 }
245 if (fofs == en->ei.fofs + en->ei.len - 1) {
246 /* lookup next node for merging frontward later */
247 tmp_node = rb_next(&en->rb_node);
248 *next_entry = rb_entry_safe(tmp_node,
249 struct extent_node, rb_node);
250 }
251 return en;
252 }
253
254 static struct kmem_cache *extent_tree_slab;
255 static struct kmem_cache *extent_node_slab;
256
__attach_extent_node(struct f2fs_sb_info * sbi,struct extent_tree * et,struct extent_info * ei,struct rb_node * parent,struct rb_node ** p,bool leftmost)257 static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
258 struct extent_tree *et, struct extent_info *ei,
259 struct rb_node *parent, struct rb_node **p,
260 bool leftmost)
261 {
262 struct extent_tree_info *eti = &sbi->extent_tree[et->type];
263 struct extent_node *en;
264
265 en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi);
266 if (!en)
267 return NULL;
268
269 en->ei = *ei;
270 INIT_LIST_HEAD(&en->list);
271 en->et = et;
272
273 rb_link_node(&en->rb_node, parent, p);
274 rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
275 atomic_inc(&et->node_cnt);
276 atomic_inc(&eti->total_ext_node);
277 return en;
278 }
279
__detach_extent_node(struct f2fs_sb_info * sbi,struct extent_tree * et,struct extent_node * en)280 static void __detach_extent_node(struct f2fs_sb_info *sbi,
281 struct extent_tree *et, struct extent_node *en)
282 {
283 struct extent_tree_info *eti = &sbi->extent_tree[et->type];
284
285 rb_erase_cached(&en->rb_node, &et->root);
286 atomic_dec(&et->node_cnt);
287 atomic_dec(&eti->total_ext_node);
288
289 if (et->cached_en == en)
290 et->cached_en = NULL;
291 kmem_cache_free(extent_node_slab, en);
292 }
293
294 /*
295 * Flow to release an extent_node:
296 * 1. list_del_init
297 * 2. __detach_extent_node
298 * 3. kmem_cache_free.
299 */
__release_extent_node(struct f2fs_sb_info * sbi,struct extent_tree * et,struct extent_node * en)300 static void __release_extent_node(struct f2fs_sb_info *sbi,
301 struct extent_tree *et, struct extent_node *en)
302 {
303 struct extent_tree_info *eti = &sbi->extent_tree[et->type];
304
305 spin_lock(&eti->extent_lock);
306 f2fs_bug_on(sbi, list_empty(&en->list));
307 list_del_init(&en->list);
308 spin_unlock(&eti->extent_lock);
309
310 __detach_extent_node(sbi, et, en);
311 }
312
__grab_extent_tree(struct inode * inode,enum extent_type type)313 static struct extent_tree *__grab_extent_tree(struct inode *inode,
314 enum extent_type type)
315 {
316 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
317 struct extent_tree_info *eti = &sbi->extent_tree[type];
318 struct extent_tree *et;
319 nid_t ino = inode->i_ino;
320
321 mutex_lock(&eti->extent_tree_lock);
322 et = radix_tree_lookup(&eti->extent_tree_root, ino);
323 if (!et) {
324 et = f2fs_kmem_cache_alloc(extent_tree_slab,
325 GFP_NOFS, true, NULL);
326 f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et);
327 memset(et, 0, sizeof(struct extent_tree));
328 et->ino = ino;
329 et->type = type;
330 et->root = RB_ROOT_CACHED;
331 et->cached_en = NULL;
332 rwlock_init(&et->lock);
333 INIT_LIST_HEAD(&et->list);
334 atomic_set(&et->node_cnt, 0);
335 atomic_inc(&eti->total_ext_tree);
336 } else {
337 atomic_dec(&eti->total_zombie_tree);
338 list_del_init(&et->list);
339 }
340 mutex_unlock(&eti->extent_tree_lock);
341
342 /* never died until evict_inode */
343 F2FS_I(inode)->extent_tree[type] = et;
344
345 return et;
346 }
347
__free_extent_tree(struct f2fs_sb_info * sbi,struct extent_tree * et)348 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
349 struct extent_tree *et)
350 {
351 struct rb_node *node, *next;
352 struct extent_node *en;
353 unsigned int count = atomic_read(&et->node_cnt);
354
355 node = rb_first_cached(&et->root);
356 while (node) {
357 next = rb_next(node);
358 en = rb_entry(node, struct extent_node, rb_node);
359 __release_extent_node(sbi, et, en);
360 node = next;
361 }
362
363 return count - atomic_read(&et->node_cnt);
364 }
365
__drop_largest_extent(struct extent_tree * et,pgoff_t fofs,unsigned int len)366 static void __drop_largest_extent(struct extent_tree *et,
367 pgoff_t fofs, unsigned int len)
368 {
369 if (fofs < (pgoff_t)et->largest.fofs + et->largest.len &&
370 fofs + len > et->largest.fofs) {
371 et->largest.len = 0;
372 et->largest_updated = true;
373 }
374 }
375
f2fs_init_read_extent_tree(struct inode * inode,struct page * ipage)376 void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
377 {
378 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
379 struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
380 struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
381 struct extent_tree *et;
382 struct extent_node *en;
383 struct extent_info ei;
384
385 if (!__may_extent_tree(inode, EX_READ)) {
386 /* drop largest read extent */
387 if (i_ext->len) {
388 f2fs_wait_on_page_writeback(ipage, NODE, true, true);
389 i_ext->len = 0;
390 set_page_dirty(ipage);
391 }
392 set_inode_flag(inode, FI_NO_EXTENT);
393 return;
394 }
395
396 et = __grab_extent_tree(inode, EX_READ);
397
398 get_read_extent_info(&ei, i_ext);
399
400 write_lock(&et->lock);
401 if (atomic_read(&et->node_cnt) || !ei.len)
402 goto skip;
403
404 en = __attach_extent_node(sbi, et, &ei, NULL,
405 &et->root.rb_root.rb_node, true);
406 if (en) {
407 et->largest = en->ei;
408 et->cached_en = en;
409
410 spin_lock(&eti->extent_lock);
411 list_add_tail(&en->list, &eti->extent_list);
412 spin_unlock(&eti->extent_lock);
413 }
414 skip:
415 /* Let's drop, if checkpoint got corrupted. */
416 if (f2fs_cp_error(sbi)) {
417 et->largest.len = 0;
418 et->largest_updated = true;
419 }
420 write_unlock(&et->lock);
421 }
422
f2fs_init_age_extent_tree(struct inode * inode)423 void f2fs_init_age_extent_tree(struct inode *inode)
424 {
425 if (!__init_may_extent_tree(inode, EX_BLOCK_AGE))
426 return;
427 __grab_extent_tree(inode, EX_BLOCK_AGE);
428 }
429
f2fs_init_extent_tree(struct inode * inode)430 void f2fs_init_extent_tree(struct inode *inode)
431 {
432 /* initialize read cache */
433 if (__init_may_extent_tree(inode, EX_READ))
434 __grab_extent_tree(inode, EX_READ);
435
436 /* initialize block age cache */
437 if (__init_may_extent_tree(inode, EX_BLOCK_AGE))
438 __grab_extent_tree(inode, EX_BLOCK_AGE);
439 }
440
__lookup_extent_tree(struct inode * inode,pgoff_t pgofs,struct extent_info * ei,enum extent_type type)441 static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
442 struct extent_info *ei, enum extent_type type)
443 {
444 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
445 struct extent_tree_info *eti = &sbi->extent_tree[type];
446 struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
447 struct extent_node *en;
448 bool ret = false;
449
450 if (!et)
451 return false;
452
453 trace_f2fs_lookup_extent_tree_start(inode, pgofs, type);
454
455 read_lock(&et->lock);
456
457 if (type == EX_READ &&
458 et->largest.fofs <= pgofs &&
459 (pgoff_t)et->largest.fofs + et->largest.len > pgofs) {
460 *ei = et->largest;
461 ret = true;
462 stat_inc_largest_node_hit(sbi);
463 goto out;
464 }
465
466 en = __lookup_extent_node(&et->root, et->cached_en, pgofs);
467 if (!en)
468 goto out;
469
470 if (en == et->cached_en)
471 stat_inc_cached_node_hit(sbi, type);
472 else
473 stat_inc_rbtree_node_hit(sbi, type);
474
475 *ei = en->ei;
476 spin_lock(&eti->extent_lock);
477 if (!list_empty(&en->list)) {
478 list_move_tail(&en->list, &eti->extent_list);
479 et->cached_en = en;
480 }
481 spin_unlock(&eti->extent_lock);
482 ret = true;
483 out:
484 stat_inc_total_hit(sbi, type);
485 read_unlock(&et->lock);
486
487 if (type == EX_READ)
488 trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei);
489 else if (type == EX_BLOCK_AGE)
490 trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei);
491 return ret;
492 }
493
__try_merge_extent_node(struct f2fs_sb_info * sbi,struct extent_tree * et,struct extent_info * ei,struct extent_node * prev_ex,struct extent_node * next_ex)494 static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
495 struct extent_tree *et, struct extent_info *ei,
496 struct extent_node *prev_ex,
497 struct extent_node *next_ex)
498 {
499 struct extent_tree_info *eti = &sbi->extent_tree[et->type];
500 struct extent_node *en = NULL;
501
502 if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) {
503 prev_ex->ei.len += ei->len;
504 ei = &prev_ex->ei;
505 en = prev_ex;
506 }
507
508 if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) {
509 next_ex->ei.fofs = ei->fofs;
510 next_ex->ei.len += ei->len;
511 if (et->type == EX_READ)
512 next_ex->ei.blk = ei->blk;
513 if (en)
514 __release_extent_node(sbi, et, prev_ex);
515
516 en = next_ex;
517 }
518
519 if (!en)
520 return NULL;
521
522 __try_update_largest_extent(et, en);
523
524 spin_lock(&eti->extent_lock);
525 if (!list_empty(&en->list)) {
526 list_move_tail(&en->list, &eti->extent_list);
527 et->cached_en = en;
528 }
529 spin_unlock(&eti->extent_lock);
530 return en;
531 }
532
__insert_extent_tree(struct f2fs_sb_info * sbi,struct extent_tree * et,struct extent_info * ei,struct rb_node ** insert_p,struct rb_node * insert_parent,bool leftmost)533 static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
534 struct extent_tree *et, struct extent_info *ei,
535 struct rb_node **insert_p,
536 struct rb_node *insert_parent,
537 bool leftmost)
538 {
539 struct extent_tree_info *eti = &sbi->extent_tree[et->type];
540 struct rb_node **p = &et->root.rb_root.rb_node;
541 struct rb_node *parent = NULL;
542 struct extent_node *en = NULL;
543
544 if (insert_p && insert_parent) {
545 parent = insert_parent;
546 p = insert_p;
547 goto do_insert;
548 }
549
550 leftmost = true;
551
552 /* look up extent_node in the rb tree */
553 while (*p) {
554 parent = *p;
555 en = rb_entry(parent, struct extent_node, rb_node);
556
557 if (ei->fofs < en->ei.fofs) {
558 p = &(*p)->rb_left;
559 } else if (ei->fofs >= en->ei.fofs + en->ei.len) {
560 p = &(*p)->rb_right;
561 leftmost = false;
562 } else {
563 f2fs_bug_on(sbi, 1);
564 }
565 }
566
567 do_insert:
568 en = __attach_extent_node(sbi, et, ei, parent, p, leftmost);
569 if (!en)
570 return NULL;
571
572 __try_update_largest_extent(et, en);
573
574 /* update in global extent list */
575 spin_lock(&eti->extent_lock);
576 list_add_tail(&en->list, &eti->extent_list);
577 et->cached_en = en;
578 spin_unlock(&eti->extent_lock);
579 return en;
580 }
581
__update_extent_tree_range(struct inode * inode,struct extent_info * tei,enum extent_type type)582 static void __update_extent_tree_range(struct inode *inode,
583 struct extent_info *tei, enum extent_type type)
584 {
585 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
586 struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
587 struct extent_node *en = NULL, *en1 = NULL;
588 struct extent_node *prev_en = NULL, *next_en = NULL;
589 struct extent_info ei, dei, prev;
590 struct rb_node **insert_p = NULL, *insert_parent = NULL;
591 unsigned int fofs = tei->fofs, len = tei->len;
592 unsigned int end = fofs + len;
593 bool updated = false;
594 bool leftmost = false;
595
596 if (!et)
597 return;
598
599 if (type == EX_READ)
600 trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
601 tei->blk, 0);
602 else if (type == EX_BLOCK_AGE)
603 trace_f2fs_update_age_extent_tree_range(inode, fofs, len,
604 tei->age, tei->last_blocks);
605
606 write_lock(&et->lock);
607
608 if (type == EX_READ) {
609 if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
610 write_unlock(&et->lock);
611 return;
612 }
613
614 prev = et->largest;
615 dei.len = 0;
616
617 /*
618 * drop largest extent before lookup, in case it's already
619 * been shrunk from extent tree
620 */
621 __drop_largest_extent(et, fofs, len);
622 }
623
624 /* 1. lookup first extent node in range [fofs, fofs + len - 1] */
625 en = __lookup_extent_node_ret(&et->root,
626 et->cached_en, fofs,
627 &prev_en, &next_en,
628 &insert_p, &insert_parent,
629 &leftmost);
630 if (!en)
631 en = next_en;
632
633 /* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */
634 while (en && en->ei.fofs < end) {
635 unsigned int org_end;
636 int parts = 0; /* # of parts current extent split into */
637
638 next_en = en1 = NULL;
639
640 dei = en->ei;
641 org_end = dei.fofs + dei.len;
642 f2fs_bug_on(sbi, fofs >= org_end);
643
644 if (fofs > dei.fofs && (type != EX_READ ||
645 fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) {
646 en->ei.len = fofs - en->ei.fofs;
647 prev_en = en;
648 parts = 1;
649 }
650
651 if (end < org_end && (type != EX_READ ||
652 org_end - end >= F2FS_MIN_EXTENT_LEN)) {
653 if (parts) {
654 __set_extent_info(&ei,
655 end, org_end - end,
656 end - dei.fofs + dei.blk, false,
657 dei.age, dei.last_blocks,
658 type);
659 en1 = __insert_extent_tree(sbi, et, &ei,
660 NULL, NULL, true);
661 next_en = en1;
662 } else {
663 __set_extent_info(&en->ei,
664 end, en->ei.len - (end - dei.fofs),
665 en->ei.blk + (end - dei.fofs), true,
666 dei.age, dei.last_blocks,
667 type);
668 next_en = en;
669 }
670 parts++;
671 }
672
673 if (!next_en) {
674 struct rb_node *node = rb_next(&en->rb_node);
675
676 next_en = rb_entry_safe(node, struct extent_node,
677 rb_node);
678 }
679
680 if (parts)
681 __try_update_largest_extent(et, en);
682 else
683 __release_extent_node(sbi, et, en);
684
685 /*
686 * if original extent is split into zero or two parts, extent
687 * tree has been altered by deletion or insertion, therefore
688 * invalidate pointers regard to tree.
689 */
690 if (parts != 1) {
691 insert_p = NULL;
692 insert_parent = NULL;
693 }
694 en = next_en;
695 }
696
697 if (type == EX_BLOCK_AGE)
698 goto update_age_extent_cache;
699
700 /* 3. update extent in read extent cache */
701 BUG_ON(type != EX_READ);
702
703 if (tei->blk) {
704 __set_extent_info(&ei, fofs, len, tei->blk, false,
705 0, 0, EX_READ);
706 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
707 __insert_extent_tree(sbi, et, &ei,
708 insert_p, insert_parent, leftmost);
709
710 /* give up extent_cache, if split and small updates happen */
711 if (dei.len >= 1 &&
712 prev.len < F2FS_MIN_EXTENT_LEN &&
713 et->largest.len < F2FS_MIN_EXTENT_LEN) {
714 et->largest.len = 0;
715 et->largest_updated = true;
716 set_inode_flag(inode, FI_NO_EXTENT);
717 }
718 }
719
720 if (is_inode_flag_set(inode, FI_NO_EXTENT))
721 __free_extent_tree(sbi, et);
722
723 if (et->largest_updated) {
724 et->largest_updated = false;
725 updated = true;
726 }
727 goto out_read_extent_cache;
728 update_age_extent_cache:
729 if (!tei->last_blocks)
730 goto out_read_extent_cache;
731
732 __set_extent_info(&ei, fofs, len, 0, false,
733 tei->age, tei->last_blocks, EX_BLOCK_AGE);
734 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
735 __insert_extent_tree(sbi, et, &ei,
736 insert_p, insert_parent, leftmost);
737 out_read_extent_cache:
738 write_unlock(&et->lock);
739
740 if (updated)
741 f2fs_mark_inode_dirty_sync(inode, true);
742 }
743
744 #ifdef CONFIG_F2FS_FS_COMPRESSION
f2fs_update_read_extent_tree_range_compressed(struct inode * inode,pgoff_t fofs,block_t blkaddr,unsigned int llen,unsigned int c_len)745 void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
746 pgoff_t fofs, block_t blkaddr, unsigned int llen,
747 unsigned int c_len)
748 {
749 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
750 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
751 struct extent_node *en = NULL;
752 struct extent_node *prev_en = NULL, *next_en = NULL;
753 struct extent_info ei;
754 struct rb_node **insert_p = NULL, *insert_parent = NULL;
755 bool leftmost = false;
756
757 trace_f2fs_update_read_extent_tree_range(inode, fofs, llen,
758 blkaddr, c_len);
759
760 /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
761 if (is_inode_flag_set(inode, FI_NO_EXTENT))
762 return;
763
764 write_lock(&et->lock);
765
766 en = __lookup_extent_node_ret(&et->root,
767 et->cached_en, fofs,
768 &prev_en, &next_en,
769 &insert_p, &insert_parent,
770 &leftmost);
771 if (en)
772 goto unlock_out;
773
774 __set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ);
775 ei.c_len = c_len;
776
777 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
778 __insert_extent_tree(sbi, et, &ei,
779 insert_p, insert_parent, leftmost);
780 unlock_out:
781 write_unlock(&et->lock);
782 }
783 #endif
784
__calculate_block_age(struct f2fs_sb_info * sbi,unsigned long long new,unsigned long long old)785 static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi,
786 unsigned long long new,
787 unsigned long long old)
788 {
789 unsigned int rem_old, rem_new;
790 unsigned long long res;
791 unsigned int weight = sbi->last_age_weight;
792
793 res = div_u64_rem(new, 100, &rem_new) * (100 - weight)
794 + div_u64_rem(old, 100, &rem_old) * weight;
795
796 if (rem_new)
797 res += rem_new * (100 - weight) / 100;
798 if (rem_old)
799 res += rem_old * weight / 100;
800
801 return res;
802 }
803
804 /* This returns a new age and allocated blocks in ei */
__get_new_block_age(struct inode * inode,struct extent_info * ei,block_t blkaddr)805 static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
806 block_t blkaddr)
807 {
808 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
809 loff_t f_size = i_size_read(inode);
810 unsigned long long cur_blocks =
811 atomic64_read(&sbi->allocated_data_blocks);
812 struct extent_info tei = *ei; /* only fofs and len are valid */
813
814 /*
815 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last
816 * file block even in seq write. So don't record age for newly last file
817 * block here.
818 */
819 if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) &&
820 blkaddr == NEW_ADDR)
821 return -EINVAL;
822
823 if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) {
824 unsigned long long cur_age;
825
826 if (cur_blocks >= tei.last_blocks)
827 cur_age = cur_blocks - tei.last_blocks;
828 else
829 /* allocated_data_blocks overflow */
830 cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks;
831
832 if (tei.age)
833 ei->age = __calculate_block_age(sbi, cur_age, tei.age);
834 else
835 ei->age = cur_age;
836 ei->last_blocks = cur_blocks;
837 WARN_ON(ei->age > cur_blocks);
838 return 0;
839 }
840
841 f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
842
843 /* the data block was allocated for the first time */
844 if (blkaddr == NEW_ADDR)
845 goto out;
846
847 if (__is_valid_data_blkaddr(blkaddr) &&
848 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE))
849 return -EINVAL;
850 out:
851 /*
852 * init block age with zero, this can happen when the block age extent
853 * was reclaimed due to memory constraint or system reboot
854 */
855 ei->age = 0;
856 ei->last_blocks = cur_blocks;
857 return 0;
858 }
859
__update_extent_cache(struct dnode_of_data * dn,enum extent_type type)860 static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
861 {
862 struct extent_info ei = {};
863
864 if (!__may_extent_tree(dn->inode, type))
865 return;
866
867 ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
868 dn->ofs_in_node;
869 ei.len = 1;
870
871 if (type == EX_READ) {
872 if (dn->data_blkaddr == NEW_ADDR)
873 ei.blk = NULL_ADDR;
874 else
875 ei.blk = dn->data_blkaddr;
876 } else if (type == EX_BLOCK_AGE) {
877 if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr))
878 return;
879 }
880 __update_extent_tree_range(dn->inode, &ei, type);
881 }
882
__shrink_extent_tree(struct f2fs_sb_info * sbi,int nr_shrink,enum extent_type type)883 static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink,
884 enum extent_type type)
885 {
886 struct extent_tree_info *eti = &sbi->extent_tree[type];
887 struct extent_tree *et, *next;
888 struct extent_node *en;
889 unsigned int node_cnt = 0, tree_cnt = 0;
890 int remained;
891
892 if (!atomic_read(&eti->total_zombie_tree))
893 goto free_node;
894
895 if (!mutex_trylock(&eti->extent_tree_lock))
896 goto out;
897
898 /* 1. remove unreferenced extent tree */
899 list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
900 if (atomic_read(&et->node_cnt)) {
901 write_lock(&et->lock);
902 node_cnt += __free_extent_tree(sbi, et);
903 write_unlock(&et->lock);
904 }
905 f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
906 list_del_init(&et->list);
907 radix_tree_delete(&eti->extent_tree_root, et->ino);
908 kmem_cache_free(extent_tree_slab, et);
909 atomic_dec(&eti->total_ext_tree);
910 atomic_dec(&eti->total_zombie_tree);
911 tree_cnt++;
912
913 if (node_cnt + tree_cnt >= nr_shrink)
914 goto unlock_out;
915 cond_resched();
916 }
917 mutex_unlock(&eti->extent_tree_lock);
918
919 free_node:
920 /* 2. remove LRU extent entries */
921 if (!mutex_trylock(&eti->extent_tree_lock))
922 goto out;
923
924 remained = nr_shrink - (node_cnt + tree_cnt);
925
926 spin_lock(&eti->extent_lock);
927 for (; remained > 0; remained--) {
928 if (list_empty(&eti->extent_list))
929 break;
930 en = list_first_entry(&eti->extent_list,
931 struct extent_node, list);
932 et = en->et;
933 if (!write_trylock(&et->lock)) {
934 /* refresh this extent node's position in extent list */
935 list_move_tail(&en->list, &eti->extent_list);
936 continue;
937 }
938
939 list_del_init(&en->list);
940 spin_unlock(&eti->extent_lock);
941
942 __detach_extent_node(sbi, et, en);
943
944 write_unlock(&et->lock);
945 node_cnt++;
946 spin_lock(&eti->extent_lock);
947 }
948 spin_unlock(&eti->extent_lock);
949
950 unlock_out:
951 mutex_unlock(&eti->extent_tree_lock);
952 out:
953 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type);
954
955 return node_cnt + tree_cnt;
956 }
957
958 /* read extent cache operations */
f2fs_lookup_read_extent_cache(struct inode * inode,pgoff_t pgofs,struct extent_info * ei)959 bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
960 struct extent_info *ei)
961 {
962 if (!__may_extent_tree(inode, EX_READ))
963 return false;
964
965 return __lookup_extent_tree(inode, pgofs, ei, EX_READ);
966 }
967
f2fs_lookup_read_extent_cache_block(struct inode * inode,pgoff_t index,block_t * blkaddr)968 bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index,
969 block_t *blkaddr)
970 {
971 struct extent_info ei = {};
972
973 if (!f2fs_lookup_read_extent_cache(inode, index, &ei))
974 return false;
975 *blkaddr = ei.blk + index - ei.fofs;
976 return true;
977 }
978
f2fs_update_read_extent_cache(struct dnode_of_data * dn)979 void f2fs_update_read_extent_cache(struct dnode_of_data *dn)
980 {
981 return __update_extent_cache(dn, EX_READ);
982 }
983
f2fs_update_read_extent_cache_range(struct dnode_of_data * dn,pgoff_t fofs,block_t blkaddr,unsigned int len)984 void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
985 pgoff_t fofs, block_t blkaddr, unsigned int len)
986 {
987 struct extent_info ei = {
988 .fofs = fofs,
989 .len = len,
990 .blk = blkaddr,
991 };
992
993 if (!__may_extent_tree(dn->inode, EX_READ))
994 return;
995
996 __update_extent_tree_range(dn->inode, &ei, EX_READ);
997 }
998
f2fs_shrink_read_extent_tree(struct f2fs_sb_info * sbi,int nr_shrink)999 unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1000 {
1001 if (!test_opt(sbi, READ_EXTENT_CACHE))
1002 return 0;
1003
1004 return __shrink_extent_tree(sbi, nr_shrink, EX_READ);
1005 }
1006
1007 /* block age extent cache operations */
f2fs_lookup_age_extent_cache(struct inode * inode,pgoff_t pgofs,struct extent_info * ei)1008 bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
1009 struct extent_info *ei)
1010 {
1011 if (!__may_extent_tree(inode, EX_BLOCK_AGE))
1012 return false;
1013
1014 return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE);
1015 }
1016
f2fs_update_age_extent_cache(struct dnode_of_data * dn)1017 void f2fs_update_age_extent_cache(struct dnode_of_data *dn)
1018 {
1019 return __update_extent_cache(dn, EX_BLOCK_AGE);
1020 }
1021
f2fs_update_age_extent_cache_range(struct dnode_of_data * dn,pgoff_t fofs,unsigned int len)1022 void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
1023 pgoff_t fofs, unsigned int len)
1024 {
1025 struct extent_info ei = {
1026 .fofs = fofs,
1027 .len = len,
1028 };
1029
1030 if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE))
1031 return;
1032
1033 __update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE);
1034 }
1035
f2fs_shrink_age_extent_tree(struct f2fs_sb_info * sbi,int nr_shrink)1036 unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1037 {
1038 if (!test_opt(sbi, AGE_EXTENT_CACHE))
1039 return 0;
1040
1041 return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);
1042 }
1043
__destroy_extent_node(struct inode * inode,enum extent_type type)1044 static unsigned int __destroy_extent_node(struct inode *inode,
1045 enum extent_type type)
1046 {
1047 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1048 struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1049 unsigned int node_cnt = 0;
1050
1051 if (!et || !atomic_read(&et->node_cnt))
1052 return 0;
1053
1054 write_lock(&et->lock);
1055 node_cnt = __free_extent_tree(sbi, et);
1056 write_unlock(&et->lock);
1057
1058 return node_cnt;
1059 }
1060
f2fs_destroy_extent_node(struct inode * inode)1061 void f2fs_destroy_extent_node(struct inode *inode)
1062 {
1063 __destroy_extent_node(inode, EX_READ);
1064 __destroy_extent_node(inode, EX_BLOCK_AGE);
1065 }
1066
__drop_extent_tree(struct inode * inode,enum extent_type type)1067 static void __drop_extent_tree(struct inode *inode, enum extent_type type)
1068 {
1069 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1070 struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1071 bool updated = false;
1072
1073 if (!__may_extent_tree(inode, type))
1074 return;
1075
1076 write_lock(&et->lock);
1077 __free_extent_tree(sbi, et);
1078 if (type == EX_READ) {
1079 set_inode_flag(inode, FI_NO_EXTENT);
1080 if (et->largest.len) {
1081 et->largest.len = 0;
1082 updated = true;
1083 }
1084 }
1085 write_unlock(&et->lock);
1086 if (updated)
1087 f2fs_mark_inode_dirty_sync(inode, true);
1088 }
1089
f2fs_drop_extent_tree(struct inode * inode)1090 void f2fs_drop_extent_tree(struct inode *inode)
1091 {
1092 __drop_extent_tree(inode, EX_READ);
1093 __drop_extent_tree(inode, EX_BLOCK_AGE);
1094 }
1095
__destroy_extent_tree(struct inode * inode,enum extent_type type)1096 static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
1097 {
1098 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1099 struct extent_tree_info *eti = &sbi->extent_tree[type];
1100 struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1101 unsigned int node_cnt = 0;
1102
1103 if (!et)
1104 return;
1105
1106 if (inode->i_nlink && !is_bad_inode(inode) &&
1107 atomic_read(&et->node_cnt)) {
1108 mutex_lock(&eti->extent_tree_lock);
1109 list_add_tail(&et->list, &eti->zombie_list);
1110 atomic_inc(&eti->total_zombie_tree);
1111 mutex_unlock(&eti->extent_tree_lock);
1112 return;
1113 }
1114
1115 /* free all extent info belong to this extent tree */
1116 node_cnt = __destroy_extent_node(inode, type);
1117
1118 /* delete extent tree entry in radix tree */
1119 mutex_lock(&eti->extent_tree_lock);
1120 f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
1121 radix_tree_delete(&eti->extent_tree_root, inode->i_ino);
1122 kmem_cache_free(extent_tree_slab, et);
1123 atomic_dec(&eti->total_ext_tree);
1124 mutex_unlock(&eti->extent_tree_lock);
1125
1126 F2FS_I(inode)->extent_tree[type] = NULL;
1127
1128 trace_f2fs_destroy_extent_tree(inode, node_cnt, type);
1129 }
1130
f2fs_destroy_extent_tree(struct inode * inode)1131 void f2fs_destroy_extent_tree(struct inode *inode)
1132 {
1133 __destroy_extent_tree(inode, EX_READ);
1134 __destroy_extent_tree(inode, EX_BLOCK_AGE);
1135 }
1136
__init_extent_tree_info(struct extent_tree_info * eti)1137 static void __init_extent_tree_info(struct extent_tree_info *eti)
1138 {
1139 INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO);
1140 mutex_init(&eti->extent_tree_lock);
1141 INIT_LIST_HEAD(&eti->extent_list);
1142 spin_lock_init(&eti->extent_lock);
1143 atomic_set(&eti->total_ext_tree, 0);
1144 INIT_LIST_HEAD(&eti->zombie_list);
1145 atomic_set(&eti->total_zombie_tree, 0);
1146 atomic_set(&eti->total_ext_node, 0);
1147 }
1148
f2fs_init_extent_cache_info(struct f2fs_sb_info * sbi)1149 void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
1150 {
1151 __init_extent_tree_info(&sbi->extent_tree[EX_READ]);
1152 __init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]);
1153
1154 /* initialize for block age extents */
1155 atomic64_set(&sbi->allocated_data_blocks, 0);
1156 sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
1157 sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
1158 sbi->last_age_weight = LAST_AGE_WEIGHT;
1159 }
1160
f2fs_create_extent_cache(void)1161 int __init f2fs_create_extent_cache(void)
1162 {
1163 extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
1164 sizeof(struct extent_tree));
1165 if (!extent_tree_slab)
1166 return -ENOMEM;
1167 extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
1168 sizeof(struct extent_node));
1169 if (!extent_node_slab) {
1170 kmem_cache_destroy(extent_tree_slab);
1171 return -ENOMEM;
1172 }
1173 return 0;
1174 }
1175
f2fs_destroy_extent_cache(void)1176 void f2fs_destroy_extent_cache(void)
1177 {
1178 kmem_cache_destroy(extent_node_slab);
1179 kmem_cache_destroy(extent_tree_slab);
1180 }
1181