xref: /linux/fs/f2fs/extent_cache.c (revision 7a5f1cd22d47f8ca4b760b6334378ae42c1bd24b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * f2fs extent cache support
4  *
5  * Copyright (c) 2015 Motorola Mobility
6  * Copyright (c) 2015 Samsung Electronics
7  * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
8  *          Chao Yu <chao2.yu@samsung.com>
9  *
10  * block_age-based extent cache added by:
11  * Copyright (c) 2022 xiaomi Co., Ltd.
12  *             http://www.xiaomi.com/
13  */
14 
15 #include <linux/fs.h>
16 #include <linux/f2fs_fs.h>
17 
18 #include "f2fs.h"
19 #include "node.h"
20 #include <trace/events/f2fs.h>
21 
22 bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio)
23 {
24 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
25 	struct f2fs_extent *i_ext = &F2FS_INODE(ifolio)->i_ext;
26 	struct extent_info ei;
27 	int devi;
28 
29 	get_read_extent_info(&ei, i_ext);
30 
31 	if (!ei.len)
32 		return true;
33 
34 	if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) ||
35 	    !f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1,
36 					DATA_GENERIC_ENHANCE)) {
37 		f2fs_warn(sbi, "%s: inode (ino=%llx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
38 			  __func__, inode->i_ino,
39 			  ei.blk, ei.fofs, ei.len);
40 		return false;
41 	}
42 
43 	if (!IS_DEVICE_ALIASING(inode))
44 		return true;
45 
46 	for (devi = 0; devi < sbi->s_ndevs; devi++) {
47 		if (FDEV(devi).start_blk != ei.blk ||
48 				FDEV(devi).end_blk != ei.blk + ei.len - 1)
49 			continue;
50 
51 		if (devi == 0) {
52 			f2fs_warn(sbi,
53 			    "%s: inode (ino=%llx) is an alias of meta device",
54 			    __func__, inode->i_ino);
55 			return false;
56 		}
57 
58 		if (bdev_is_zoned(FDEV(devi).bdev)) {
59 			f2fs_warn(sbi,
60 			    "%s: device alias inode (ino=%llx)'s extent info "
61 			    "[%u, %u, %u] maps to zoned block device",
62 			    __func__, inode->i_ino, ei.blk, ei.fofs, ei.len);
63 			return false;
64 		}
65 		return true;
66 	}
67 
68 	f2fs_warn(sbi, "%s: device alias inode (ino=%llx)'s extent info "
69 			"[%u, %u, %u] is inconsistent w/ any devices",
70 			__func__, inode->i_ino, ei.blk, ei.fofs, ei.len);
71 	return false;
72 }
73 
74 static void __set_extent_info(struct extent_info *ei,
75 				unsigned int fofs, unsigned int len,
76 				block_t blk, bool keep_clen,
77 				unsigned long age, unsigned long last_blocks,
78 				enum extent_type type)
79 {
80 	ei->fofs = fofs;
81 	ei->len = len;
82 
83 	if (type == EX_READ) {
84 		ei->blk = blk;
85 		if (keep_clen)
86 			return;
87 #ifdef CONFIG_F2FS_FS_COMPRESSION
88 		ei->c_len = 0;
89 #endif
90 	} else if (type == EX_BLOCK_AGE) {
91 		ei->age = age;
92 		ei->last_blocks = last_blocks;
93 	}
94 }
95 
96 static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
97 {
98 	if (type == EX_READ)
99 		return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) &&
100 			S_ISREG(inode->i_mode);
101 	if (type == EX_BLOCK_AGE)
102 		return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) &&
103 			(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode));
104 	return false;
105 }
106 
107 static bool __may_extent_tree(struct inode *inode, enum extent_type type)
108 {
109 	if (IS_DEVICE_ALIASING(inode) && type == EX_READ)
110 		return true;
111 
112 	/*
113 	 * for recovered files during mount do not create extents
114 	 * if shrinker is not registered.
115 	 */
116 	if (list_empty(&F2FS_I_SB(inode)->s_list))
117 		return false;
118 
119 	if (!__init_may_extent_tree(inode, type))
120 		return false;
121 
122 	if (is_inode_flag_set(inode, FI_NO_EXTENT))
123 		return false;
124 
125 	if (type == EX_READ) {
126 		if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
127 				 !f2fs_sb_has_readonly(F2FS_I_SB(inode)))
128 			return false;
129 	} else if (type == EX_BLOCK_AGE) {
130 		if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
131 			return false;
132 		if (file_is_cold(inode))
133 			return false;
134 	}
135 	return true;
136 }
137 
138 static void __try_update_largest_extent(struct extent_tree *et,
139 						struct extent_node *en)
140 {
141 	if (et->type != EX_READ)
142 		return;
143 	if (en->ei.len <= et->largest.len)
144 		return;
145 
146 	et->largest = en->ei;
147 	et->largest_updated = true;
148 }
149 
150 static bool __is_extent_mergeable(struct extent_info *back,
151 		struct extent_info *front, enum extent_type type)
152 {
153 	if (type == EX_READ) {
154 #ifdef CONFIG_F2FS_FS_COMPRESSION
155 		if (back->c_len && back->len != back->c_len)
156 			return false;
157 		if (front->c_len && front->len != front->c_len)
158 			return false;
159 #endif
160 		return (back->fofs + back->len == front->fofs &&
161 				back->blk + back->len == front->blk);
162 	} else if (type == EX_BLOCK_AGE) {
163 		return (back->fofs + back->len == front->fofs &&
164 			abs(back->age - front->age) <= SAME_AGE_REGION &&
165 			abs(back->last_blocks - front->last_blocks) <=
166 							SAME_AGE_REGION);
167 	}
168 	return false;
169 }
170 
171 static bool __is_back_mergeable(struct extent_info *cur,
172 		struct extent_info *back, enum extent_type type)
173 {
174 	return __is_extent_mergeable(back, cur, type);
175 }
176 
177 static bool __is_front_mergeable(struct extent_info *cur,
178 		struct extent_info *front, enum extent_type type)
179 {
180 	return __is_extent_mergeable(cur, front, type);
181 }
182 
183 static struct extent_node *__lookup_extent_node(struct rb_root_cached *root,
184 			struct extent_node *cached_en, unsigned int fofs)
185 {
186 	struct rb_node *node = root->rb_root.rb_node;
187 	struct extent_node *en;
188 
189 	/* check a cached entry */
190 	if (cached_en && cached_en->ei.fofs <= fofs &&
191 			cached_en->ei.fofs + cached_en->ei.len > fofs)
192 		return cached_en;
193 
194 	/* check rb_tree */
195 	while (node) {
196 		en = rb_entry(node, struct extent_node, rb_node);
197 
198 		if (fofs < en->ei.fofs)
199 			node = node->rb_left;
200 		else if (fofs >= en->ei.fofs + en->ei.len)
201 			node = node->rb_right;
202 		else
203 			return en;
204 	}
205 	return NULL;
206 }
207 
208 /*
209  * lookup rb entry in position of @fofs in rb-tree,
210  * if hit, return the entry, otherwise, return NULL
211  * @prev_ex: extent before fofs
212  * @next_ex: extent after fofs
213  * @insert_p: insert point for new extent at fofs
214  * in order to simplify the insertion after.
215  * tree must stay unchanged between lookup and insertion.
216  */
217 static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root,
218 				struct extent_node *cached_en,
219 				unsigned int fofs,
220 				struct extent_node **prev_entry,
221 				struct extent_node **next_entry,
222 				struct rb_node ***insert_p,
223 				struct rb_node **insert_parent,
224 				bool *leftmost)
225 {
226 	struct rb_node **pnode = &root->rb_root.rb_node;
227 	struct rb_node *parent = NULL, *tmp_node;
228 	struct extent_node *en = cached_en;
229 
230 	*insert_p = NULL;
231 	*insert_parent = NULL;
232 	*prev_entry = NULL;
233 	*next_entry = NULL;
234 
235 	if (RB_EMPTY_ROOT(&root->rb_root))
236 		return NULL;
237 
238 	if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs)
239 		goto lookup_neighbors;
240 
241 	*leftmost = true;
242 
243 	while (*pnode) {
244 		parent = *pnode;
245 		en = rb_entry(*pnode, struct extent_node, rb_node);
246 
247 		if (fofs < en->ei.fofs) {
248 			pnode = &(*pnode)->rb_left;
249 		} else if (fofs >= en->ei.fofs + en->ei.len) {
250 			pnode = &(*pnode)->rb_right;
251 			*leftmost = false;
252 		} else {
253 			goto lookup_neighbors;
254 		}
255 	}
256 
257 	*insert_p = pnode;
258 	*insert_parent = parent;
259 
260 	en = rb_entry(parent, struct extent_node, rb_node);
261 	tmp_node = parent;
262 	if (parent && fofs > en->ei.fofs)
263 		tmp_node = rb_next(parent);
264 	*next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
265 
266 	tmp_node = parent;
267 	if (parent && fofs < en->ei.fofs)
268 		tmp_node = rb_prev(parent);
269 	*prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
270 	return NULL;
271 
272 lookup_neighbors:
273 	if (fofs == en->ei.fofs) {
274 		/* lookup prev node for merging backward later */
275 		tmp_node = rb_prev(&en->rb_node);
276 		*prev_entry = rb_entry_safe(tmp_node,
277 					struct extent_node, rb_node);
278 	}
279 	if (fofs == en->ei.fofs + en->ei.len - 1) {
280 		/* lookup next node for merging frontward later */
281 		tmp_node = rb_next(&en->rb_node);
282 		*next_entry = rb_entry_safe(tmp_node,
283 					struct extent_node, rb_node);
284 	}
285 	return en;
286 }
287 
288 static struct kmem_cache *extent_tree_slab;
289 static struct kmem_cache *extent_node_slab;
290 
291 static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
292 				struct extent_tree *et, struct extent_info *ei,
293 				struct rb_node *parent, struct rb_node **p,
294 				bool leftmost)
295 {
296 	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
297 	struct extent_node *en;
298 
299 	en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi);
300 	if (!en)
301 		return NULL;
302 
303 	en->ei = *ei;
304 	INIT_LIST_HEAD(&en->list);
305 	en->et = et;
306 
307 	rb_link_node(&en->rb_node, parent, p);
308 	rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
309 	atomic_inc(&et->node_cnt);
310 	atomic_inc(&eti->total_ext_node);
311 	return en;
312 }
313 
314 static void __detach_extent_node(struct f2fs_sb_info *sbi,
315 				struct extent_tree *et, struct extent_node *en)
316 {
317 	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
318 
319 	rb_erase_cached(&en->rb_node, &et->root);
320 	atomic_dec(&et->node_cnt);
321 	atomic_dec(&eti->total_ext_node);
322 
323 	if (et->cached_en == en)
324 		et->cached_en = NULL;
325 	kmem_cache_free(extent_node_slab, en);
326 }
327 
328 /*
329  * Flow to release an extent_node:
330  * 1. list_del_init
331  * 2. __detach_extent_node
332  * 3. kmem_cache_free.
333  */
334 static void __release_extent_node(struct f2fs_sb_info *sbi,
335 			struct extent_tree *et, struct extent_node *en)
336 {
337 	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
338 
339 	spin_lock(&eti->extent_lock);
340 	f2fs_bug_on(sbi, list_empty(&en->list));
341 	list_del_init(&en->list);
342 	spin_unlock(&eti->extent_lock);
343 
344 	__detach_extent_node(sbi, et, en);
345 }
346 
347 static struct extent_tree *__grab_extent_tree(struct inode *inode,
348 						enum extent_type type)
349 {
350 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
351 	struct extent_tree_info *eti = &sbi->extent_tree[type];
352 	struct extent_tree *et;
353 	nid_t ino = inode->i_ino;
354 
355 	mutex_lock(&eti->extent_tree_lock);
356 	et = radix_tree_lookup(&eti->extent_tree_root, ino);
357 	if (!et) {
358 		et = f2fs_kmem_cache_alloc(extent_tree_slab,
359 					GFP_NOFS, true, NULL);
360 		f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et);
361 		memset(et, 0, sizeof(struct extent_tree));
362 		et->ino = ino;
363 		et->type = type;
364 		et->root = RB_ROOT_CACHED;
365 		et->cached_en = NULL;
366 		rwlock_init(&et->lock);
367 		INIT_LIST_HEAD(&et->list);
368 		atomic_set(&et->node_cnt, 0);
369 		atomic_inc(&eti->total_ext_tree);
370 	} else {
371 		atomic_dec(&eti->total_zombie_tree);
372 		list_del_init(&et->list);
373 	}
374 	mutex_unlock(&eti->extent_tree_lock);
375 
376 	/* never died until evict_inode */
377 	F2FS_I(inode)->extent_tree[type] = et;
378 
379 	return et;
380 }
381 
382 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
383 				struct extent_tree *et, unsigned int nr_shrink)
384 {
385 	struct rb_node *node, *next;
386 	struct extent_node *en;
387 	unsigned int count;
388 
389 	node = rb_first_cached(&et->root);
390 
391 	for (count = 0; node && count < nr_shrink; count++) {
392 		next = rb_next(node);
393 		en = rb_entry(node, struct extent_node, rb_node);
394 		__release_extent_node(sbi, et, en);
395 		node = next;
396 	}
397 
398 	return count;
399 }
400 
401 static void __drop_largest_extent(struct extent_tree *et,
402 					pgoff_t fofs, unsigned int len)
403 {
404 	if (fofs < (pgoff_t)et->largest.fofs + et->largest.len &&
405 			fofs + len > et->largest.fofs) {
406 		et->largest.len = 0;
407 		et->largest_updated = true;
408 	}
409 }
410 
411 void f2fs_init_read_extent_tree(struct inode *inode, struct folio *ifolio)
412 {
413 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
414 	struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
415 	struct f2fs_extent *i_ext = &F2FS_INODE(ifolio)->i_ext;
416 	struct extent_tree *et;
417 	struct extent_node *en;
418 	struct extent_info ei = {0};
419 
420 	if (!__may_extent_tree(inode, EX_READ)) {
421 		/* drop largest read extent */
422 		if (i_ext->len) {
423 			f2fs_folio_wait_writeback(ifolio, NODE, true, true);
424 			i_ext->len = 0;
425 			folio_mark_dirty(ifolio);
426 		}
427 		set_inode_flag(inode, FI_NO_EXTENT);
428 		return;
429 	}
430 
431 	et = __grab_extent_tree(inode, EX_READ);
432 
433 	get_read_extent_info(&ei, i_ext);
434 
435 	write_lock(&et->lock);
436 	if (atomic_read(&et->node_cnt) || !ei.len)
437 		goto skip;
438 
439 	if (IS_DEVICE_ALIASING(inode)) {
440 		et->largest = ei;
441 		goto skip;
442 	}
443 
444 	en = __attach_extent_node(sbi, et, &ei, NULL,
445 				&et->root.rb_root.rb_node, true);
446 	if (en) {
447 		et->largest = en->ei;
448 		et->cached_en = en;
449 
450 		spin_lock(&eti->extent_lock);
451 		list_add_tail(&en->list, &eti->extent_list);
452 		spin_unlock(&eti->extent_lock);
453 	}
454 skip:
455 	/* Let's drop, if checkpoint got corrupted. */
456 	if (f2fs_cp_error(sbi)) {
457 		et->largest.len = 0;
458 		et->largest_updated = true;
459 	}
460 	write_unlock(&et->lock);
461 }
462 
463 void f2fs_init_age_extent_tree(struct inode *inode)
464 {
465 	if (!__init_may_extent_tree(inode, EX_BLOCK_AGE))
466 		return;
467 	__grab_extent_tree(inode, EX_BLOCK_AGE);
468 }
469 
470 void f2fs_init_extent_tree(struct inode *inode)
471 {
472 	/* initialize read cache */
473 	if (__init_may_extent_tree(inode, EX_READ))
474 		__grab_extent_tree(inode, EX_READ);
475 
476 	/* initialize block age cache */
477 	if (__init_may_extent_tree(inode, EX_BLOCK_AGE))
478 		__grab_extent_tree(inode, EX_BLOCK_AGE);
479 }
480 
481 static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
482 			struct extent_info *ei, enum extent_type type)
483 {
484 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
485 	struct extent_tree_info *eti = &sbi->extent_tree[type];
486 	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
487 	struct extent_node *en;
488 	bool ret = false;
489 
490 	if (!et)
491 		return false;
492 
493 	trace_f2fs_lookup_extent_tree_start(inode, pgofs, type);
494 
495 	read_lock(&et->lock);
496 
497 	if (type == EX_READ &&
498 			et->largest.fofs <= pgofs &&
499 			(pgoff_t)et->largest.fofs + et->largest.len > pgofs) {
500 		*ei = et->largest;
501 		ret = true;
502 		stat_inc_largest_node_hit(sbi);
503 		goto out;
504 	}
505 
506 	if (IS_DEVICE_ALIASING(inode)) {
507 		ret = false;
508 		goto out;
509 	}
510 
511 	en = __lookup_extent_node(&et->root, et->cached_en, pgofs);
512 	if (!en)
513 		goto out;
514 
515 	if (en == et->cached_en)
516 		stat_inc_cached_node_hit(sbi, type);
517 	else
518 		stat_inc_rbtree_node_hit(sbi, type);
519 
520 	*ei = en->ei;
521 	spin_lock(&eti->extent_lock);
522 	if (!list_empty(&en->list)) {
523 		list_move_tail(&en->list, &eti->extent_list);
524 		et->cached_en = en;
525 	}
526 	spin_unlock(&eti->extent_lock);
527 	ret = true;
528 out:
529 	stat_inc_total_hit(sbi, type);
530 	read_unlock(&et->lock);
531 
532 	if (type == EX_READ)
533 		trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei);
534 	else if (type == EX_BLOCK_AGE)
535 		trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei);
536 	return ret;
537 }
538 
539 static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
540 				struct extent_tree *et, struct extent_info *ei,
541 				struct extent_node *prev_ex,
542 				struct extent_node *next_ex)
543 {
544 	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
545 	struct extent_node *en = NULL;
546 
547 	if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) {
548 		prev_ex->ei.len += ei->len;
549 		ei = &prev_ex->ei;
550 		en = prev_ex;
551 	}
552 
553 	if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) {
554 		next_ex->ei.fofs = ei->fofs;
555 		next_ex->ei.len += ei->len;
556 		if (et->type == EX_READ)
557 			next_ex->ei.blk = ei->blk;
558 		if (en)
559 			__release_extent_node(sbi, et, prev_ex);
560 
561 		en = next_ex;
562 	}
563 
564 	if (!en)
565 		return NULL;
566 
567 	__try_update_largest_extent(et, en);
568 
569 	spin_lock(&eti->extent_lock);
570 	if (!list_empty(&en->list)) {
571 		list_move_tail(&en->list, &eti->extent_list);
572 		et->cached_en = en;
573 	}
574 	spin_unlock(&eti->extent_lock);
575 	return en;
576 }
577 
578 static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
579 				struct extent_tree *et, struct extent_info *ei,
580 				struct rb_node **insert_p,
581 				struct rb_node *insert_parent,
582 				bool leftmost)
583 {
584 	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
585 	struct rb_node **p = &et->root.rb_root.rb_node;
586 	struct rb_node *parent = NULL;
587 	struct extent_node *en = NULL;
588 
589 	if (insert_p && insert_parent) {
590 		parent = insert_parent;
591 		p = insert_p;
592 		goto do_insert;
593 	}
594 
595 	leftmost = true;
596 
597 	/* look up extent_node in the rb tree */
598 	while (*p) {
599 		parent = *p;
600 		en = rb_entry(parent, struct extent_node, rb_node);
601 
602 		if (ei->fofs < en->ei.fofs) {
603 			p = &(*p)->rb_left;
604 		} else if (ei->fofs >= en->ei.fofs + en->ei.len) {
605 			p = &(*p)->rb_right;
606 			leftmost = false;
607 		} else {
608 			f2fs_err_ratelimited(sbi, "%s: corrupted extent, type: %d, "
609 				"extent node in rb tree [%u, %u, %u], age [%llu, %llu], "
610 				"extent node to insert [%u, %u, %u], age [%llu, %llu]",
611 				__func__, et->type, en->ei.fofs, en->ei.blk, en->ei.len, en->ei.age,
612 				en->ei.last_blocks, ei->fofs, ei->blk, ei->len, ei->age, ei->last_blocks);
613 			f2fs_bug_on(sbi, 1);
614 			return NULL;
615 		}
616 	}
617 
618 do_insert:
619 	en = __attach_extent_node(sbi, et, ei, parent, p, leftmost);
620 	if (!en)
621 		return NULL;
622 
623 	__try_update_largest_extent(et, en);
624 
625 	/* update in global extent list */
626 	spin_lock(&eti->extent_lock);
627 	list_add_tail(&en->list, &eti->extent_list);
628 	et->cached_en = en;
629 	spin_unlock(&eti->extent_lock);
630 	return en;
631 }
632 
633 static unsigned int __destroy_extent_node(struct inode *inode,
634 					enum extent_type type)
635 {
636 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
637 	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
638 	unsigned int nr_shrink = type == EX_READ ?
639 				READ_EXTENT_CACHE_SHRINK_NUMBER :
640 				AGE_EXTENT_CACHE_SHRINK_NUMBER;
641 	unsigned int node_cnt = 0;
642 
643 	if (!et || !atomic_read(&et->node_cnt))
644 		return 0;
645 
646 	while (atomic_read(&et->node_cnt)) {
647 		write_lock(&et->lock);
648 		if (!is_inode_flag_set(inode, FI_NO_EXTENT))
649 			set_inode_flag(inode, FI_NO_EXTENT);
650 		node_cnt += __free_extent_tree(sbi, et, nr_shrink);
651 		write_unlock(&et->lock);
652 	}
653 
654 	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
655 
656 	return node_cnt;
657 }
658 
659 static void __update_extent_tree_range(struct inode *inode,
660 			struct extent_info *tei, enum extent_type type)
661 {
662 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
663 	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
664 	struct extent_node *en = NULL, *en1 = NULL;
665 	struct extent_node *prev_en = NULL, *next_en = NULL;
666 	struct extent_info ei, dei, prev;
667 	struct rb_node **insert_p = NULL, *insert_parent = NULL;
668 	unsigned int fofs = tei->fofs, len = tei->len;
669 	unsigned int end = fofs + len;
670 	bool updated = false;
671 	bool leftmost = false;
672 
673 	if (!et)
674 		return;
675 
676 	if (unlikely(len == 0)) {
677 		f2fs_err_ratelimited(sbi, "%s: extent len is zero, type: %d, "
678 			"extent [%u, %u, %u], age [%llu, %llu]",
679 			__func__, type, tei->fofs, tei->blk, tei->len,
680 			tei->age, tei->last_blocks);
681 		f2fs_bug_on(sbi, 1);
682 		return;
683 	}
684 
685 	if (type == EX_READ)
686 		trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
687 						tei->blk, 0);
688 	else if (type == EX_BLOCK_AGE)
689 		trace_f2fs_update_age_extent_tree_range(inode, fofs, len,
690 						tei->age, tei->last_blocks);
691 
692 	write_lock(&et->lock);
693 
694 	if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
695 		write_unlock(&et->lock);
696 		return;
697 	}
698 
699 	if (type == EX_READ) {
700 		prev = et->largest;
701 		dei.len = 0;
702 
703 		/*
704 		 * drop largest extent before lookup, in case it's already
705 		 * been shrunk from extent tree
706 		 */
707 		__drop_largest_extent(et, fofs, len);
708 	}
709 
710 	/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
711 	en = __lookup_extent_node_ret(&et->root,
712 					et->cached_en, fofs,
713 					&prev_en, &next_en,
714 					&insert_p, &insert_parent,
715 					&leftmost);
716 	if (!en)
717 		en = next_en;
718 
719 	/* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */
720 	while (en && en->ei.fofs < end) {
721 		unsigned int org_end;
722 		int parts = 0;	/* # of parts current extent split into */
723 
724 		next_en = en1 = NULL;
725 
726 		dei = en->ei;
727 		org_end = dei.fofs + dei.len;
728 		f2fs_bug_on(sbi, fofs >= org_end);
729 
730 		if (fofs > dei.fofs && (type != EX_READ ||
731 				fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) {
732 			en->ei.len = fofs - en->ei.fofs;
733 			prev_en = en;
734 			parts = 1;
735 		}
736 
737 		if (end < org_end && (type != EX_READ ||
738 			(org_end - end >= F2FS_MIN_EXTENT_LEN &&
739 			atomic_read(&et->node_cnt) <
740 					sbi->max_read_extent_count))) {
741 			if (parts) {
742 				__set_extent_info(&ei,
743 					end, org_end - end,
744 					end - dei.fofs + dei.blk, false,
745 					dei.age, dei.last_blocks,
746 					type);
747 				en1 = __insert_extent_tree(sbi, et, &ei,
748 							NULL, NULL, true);
749 				next_en = en1;
750 			} else {
751 				__set_extent_info(&en->ei,
752 					end, en->ei.len - (end - dei.fofs),
753 					en->ei.blk + (end - dei.fofs), true,
754 					dei.age, dei.last_blocks,
755 					type);
756 				next_en = en;
757 			}
758 			parts++;
759 		}
760 
761 		if (!next_en) {
762 			struct rb_node *node = rb_next(&en->rb_node);
763 
764 			next_en = rb_entry_safe(node, struct extent_node,
765 						rb_node);
766 		}
767 
768 		if (parts)
769 			__try_update_largest_extent(et, en);
770 		else
771 			__release_extent_node(sbi, et, en);
772 
773 		/*
774 		 * if original extent is split into zero or two parts, extent
775 		 * tree has been altered by deletion or insertion, therefore
776 		 * invalidate pointers regard to tree.
777 		 */
778 		if (parts != 1) {
779 			insert_p = NULL;
780 			insert_parent = NULL;
781 		}
782 		en = next_en;
783 	}
784 
785 	if (type == EX_BLOCK_AGE)
786 		goto update_age_extent_cache;
787 
788 	/* 3. update extent in read extent cache */
789 	BUG_ON(type != EX_READ);
790 
791 	if (tei->blk) {
792 		__set_extent_info(&ei, fofs, len, tei->blk, false,
793 				  0, 0, EX_READ);
794 		if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
795 			__insert_extent_tree(sbi, et, &ei,
796 					insert_p, insert_parent, leftmost);
797 
798 		/* give up extent_cache, if split and small updates happen */
799 		if (dei.len >= 1 &&
800 				prev.len < F2FS_MIN_EXTENT_LEN &&
801 				et->largest.len < F2FS_MIN_EXTENT_LEN) {
802 			et->largest.len = 0;
803 			et->largest_updated = true;
804 			set_inode_flag(inode, FI_NO_EXTENT);
805 		}
806 	}
807 
808 	if (et->largest_updated) {
809 		et->largest_updated = false;
810 		updated = true;
811 	}
812 	goto out_read_extent_cache;
813 update_age_extent_cache:
814 	if (tei->last_blocks == F2FS_EXTENT_AGE_INVALID)
815 		goto out_read_extent_cache;
816 
817 	__set_extent_info(&ei, fofs, len, 0, false,
818 			tei->age, tei->last_blocks, EX_BLOCK_AGE);
819 	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
820 		__insert_extent_tree(sbi, et, &ei,
821 					insert_p, insert_parent, leftmost);
822 out_read_extent_cache:
823 	write_unlock(&et->lock);
824 
825 	if (is_inode_flag_set(inode, FI_NO_EXTENT))
826 		__destroy_extent_node(inode, EX_READ);
827 
828 	if (updated)
829 		f2fs_mark_inode_dirty_sync(inode, true);
830 }
831 
832 #ifdef CONFIG_F2FS_FS_COMPRESSION
833 void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
834 				pgoff_t fofs, block_t blkaddr, unsigned int llen,
835 				unsigned int c_len)
836 {
837 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
838 	struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
839 	struct extent_node *en = NULL;
840 	struct extent_node *prev_en = NULL, *next_en = NULL;
841 	struct extent_info ei;
842 	struct rb_node **insert_p = NULL, *insert_parent = NULL;
843 	bool leftmost = false;
844 
845 	trace_f2fs_update_read_extent_tree_range(inode, fofs, llen,
846 						blkaddr, c_len);
847 
848 	/* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
849 	if (is_inode_flag_set(inode, FI_NO_EXTENT))
850 		return;
851 
852 	write_lock(&et->lock);
853 
854 	en = __lookup_extent_node_ret(&et->root,
855 					et->cached_en, fofs,
856 					&prev_en, &next_en,
857 					&insert_p, &insert_parent,
858 					&leftmost);
859 	if (en)
860 		goto unlock_out;
861 
862 	__set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ);
863 	ei.c_len = c_len;
864 
865 	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
866 		__insert_extent_tree(sbi, et, &ei,
867 				insert_p, insert_parent, leftmost);
868 unlock_out:
869 	write_unlock(&et->lock);
870 }
871 #endif
872 
873 static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi,
874 						unsigned long long new,
875 						unsigned long long old)
876 {
877 	unsigned int rem_old, rem_new;
878 	unsigned long long res;
879 	unsigned int weight = sbi->last_age_weight;
880 
881 	res = div_u64_rem(new, 100, &rem_new) * (100 - weight)
882 		+ div_u64_rem(old, 100, &rem_old) * weight;
883 
884 	if (rem_new)
885 		res += rem_new * (100 - weight) / 100;
886 	if (rem_old)
887 		res += rem_old * weight / 100;
888 
889 	return res;
890 }
891 
892 /* This returns a new age and allocated blocks in ei */
893 static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
894 						block_t blkaddr)
895 {
896 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
897 	loff_t f_size = i_size_read(inode);
898 	unsigned long long cur_blocks =
899 				atomic64_read(&sbi->allocated_data_blocks);
900 	struct extent_info tei = *ei;	/* only fofs and len are valid */
901 
902 	/*
903 	 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last
904 	 * file block even in seq write. So don't record age for newly last file
905 	 * block here.
906 	 */
907 	if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) &&
908 			blkaddr == NEW_ADDR)
909 		return -EINVAL;
910 
911 	if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) {
912 		unsigned long long cur_age;
913 
914 		if (cur_blocks >= tei.last_blocks)
915 			cur_age = cur_blocks - tei.last_blocks;
916 		else
917 			/* allocated_data_blocks overflow */
918 			cur_age = (ULLONG_MAX - 1) - tei.last_blocks + cur_blocks;
919 
920 		if (tei.age)
921 			ei->age = __calculate_block_age(sbi, cur_age, tei.age);
922 		else
923 			ei->age = cur_age;
924 		ei->last_blocks = cur_blocks;
925 		WARN_ON(ei->age > cur_blocks);
926 		return 0;
927 	}
928 
929 	f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
930 
931 	/* the data block was allocated for the first time */
932 	if (blkaddr == NEW_ADDR)
933 		goto out;
934 
935 	if (__is_valid_data_blkaddr(blkaddr) &&
936 	    !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE))
937 		return -EINVAL;
938 out:
939 	/*
940 	 * init block age with zero, this can happen when the block age extent
941 	 * was reclaimed due to memory constraint or system reboot
942 	 */
943 	ei->age = 0;
944 	ei->last_blocks = cur_blocks;
945 	return 0;
946 }
947 
948 static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
949 {
950 	struct extent_info ei = {};
951 
952 	if (!__may_extent_tree(dn->inode, type))
953 		return;
954 
955 	ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), dn->inode) +
956 								dn->ofs_in_node;
957 	ei.len = 1;
958 
959 	if (type == EX_READ) {
960 		if (dn->data_blkaddr == NEW_ADDR)
961 			ei.blk = NULL_ADDR;
962 		else
963 			ei.blk = dn->data_blkaddr;
964 	} else if (type == EX_BLOCK_AGE) {
965 		if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr))
966 			return;
967 	}
968 	__update_extent_tree_range(dn->inode, &ei, type);
969 }
970 
971 static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink,
972 					enum extent_type type)
973 {
974 	struct extent_tree_info *eti = &sbi->extent_tree[type];
975 	struct extent_tree *et, *next;
976 	struct extent_node *en;
977 	unsigned int node_cnt = 0, tree_cnt = 0;
978 	int remained;
979 
980 	if (!atomic_read(&eti->total_zombie_tree))
981 		goto free_node;
982 
983 	if (!mutex_trylock(&eti->extent_tree_lock))
984 		goto out;
985 
986 	/* 1. remove unreferenced extent tree */
987 	list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
988 		if (atomic_read(&et->node_cnt)) {
989 			write_lock(&et->lock);
990 			node_cnt += __free_extent_tree(sbi, et,
991 					nr_shrink - node_cnt - tree_cnt);
992 			write_unlock(&et->lock);
993 		}
994 
995 		if (atomic_read(&et->node_cnt))
996 			goto unlock_out;
997 
998 		list_del_init(&et->list);
999 		radix_tree_delete(&eti->extent_tree_root, et->ino);
1000 		kmem_cache_free(extent_tree_slab, et);
1001 		atomic_dec(&eti->total_ext_tree);
1002 		atomic_dec(&eti->total_zombie_tree);
1003 		tree_cnt++;
1004 
1005 		if (node_cnt + tree_cnt >= nr_shrink)
1006 			goto unlock_out;
1007 		cond_resched();
1008 	}
1009 	mutex_unlock(&eti->extent_tree_lock);
1010 
1011 free_node:
1012 	/* 2. remove LRU extent entries */
1013 	if (!mutex_trylock(&eti->extent_tree_lock))
1014 		goto out;
1015 
1016 	remained = nr_shrink - (node_cnt + tree_cnt);
1017 
1018 	spin_lock(&eti->extent_lock);
1019 	for (; remained > 0; remained--) {
1020 		if (list_empty(&eti->extent_list))
1021 			break;
1022 		en = list_first_entry(&eti->extent_list,
1023 					struct extent_node, list);
1024 		et = en->et;
1025 		if (!write_trylock(&et->lock)) {
1026 			/* refresh this extent node's position in extent list */
1027 			list_move_tail(&en->list, &eti->extent_list);
1028 			continue;
1029 		}
1030 
1031 		list_del_init(&en->list);
1032 		spin_unlock(&eti->extent_lock);
1033 
1034 		__detach_extent_node(sbi, et, en);
1035 
1036 		write_unlock(&et->lock);
1037 		node_cnt++;
1038 		spin_lock(&eti->extent_lock);
1039 	}
1040 	spin_unlock(&eti->extent_lock);
1041 
1042 unlock_out:
1043 	mutex_unlock(&eti->extent_tree_lock);
1044 out:
1045 	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type);
1046 
1047 	return node_cnt + tree_cnt;
1048 }
1049 
1050 /* read extent cache operations */
1051 bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
1052 				struct extent_info *ei)
1053 {
1054 	if (!__may_extent_tree(inode, EX_READ))
1055 		return false;
1056 
1057 	return __lookup_extent_tree(inode, pgofs, ei, EX_READ);
1058 }
1059 
1060 bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index,
1061 				block_t *blkaddr)
1062 {
1063 	struct extent_info ei = {};
1064 
1065 	if (!f2fs_lookup_read_extent_cache(inode, index, &ei))
1066 		return false;
1067 	*blkaddr = ei.blk + index - ei.fofs;
1068 	return true;
1069 }
1070 
1071 void f2fs_update_read_extent_cache(struct dnode_of_data *dn)
1072 {
1073 	return __update_extent_cache(dn, EX_READ);
1074 }
1075 
1076 void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
1077 				pgoff_t fofs, block_t blkaddr, unsigned int len)
1078 {
1079 	struct extent_info ei = {
1080 		.fofs = fofs,
1081 		.len = len,
1082 		.blk = blkaddr,
1083 	};
1084 
1085 	if (!__may_extent_tree(dn->inode, EX_READ))
1086 		return;
1087 
1088 	__update_extent_tree_range(dn->inode, &ei, EX_READ);
1089 }
1090 
1091 unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1092 {
1093 	if (!test_opt(sbi, READ_EXTENT_CACHE))
1094 		return 0;
1095 
1096 	return __shrink_extent_tree(sbi, nr_shrink, EX_READ);
1097 }
1098 
1099 /* block age extent cache operations */
1100 bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
1101 				struct extent_info *ei)
1102 {
1103 	if (!__may_extent_tree(inode, EX_BLOCK_AGE))
1104 		return false;
1105 
1106 	return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE);
1107 }
1108 
1109 void f2fs_update_age_extent_cache(struct dnode_of_data *dn)
1110 {
1111 	return __update_extent_cache(dn, EX_BLOCK_AGE);
1112 }
1113 
1114 void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
1115 				pgoff_t fofs, unsigned int len)
1116 {
1117 	struct extent_info ei = {
1118 		.fofs = fofs,
1119 		.len = len,
1120 		.last_blocks = F2FS_EXTENT_AGE_INVALID,
1121 	};
1122 
1123 	if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE))
1124 		return;
1125 
1126 	__update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE);
1127 }
1128 
1129 unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1130 {
1131 	if (!test_opt(sbi, AGE_EXTENT_CACHE))
1132 		return 0;
1133 
1134 	return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);
1135 }
1136 
1137 void f2fs_destroy_extent_node(struct inode *inode)
1138 {
1139 	__destroy_extent_node(inode, EX_READ);
1140 	__destroy_extent_node(inode, EX_BLOCK_AGE);
1141 }
1142 
1143 static void __drop_extent_tree(struct inode *inode, enum extent_type type)
1144 {
1145 	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1146 	bool updated = false;
1147 
1148 	if (!__may_extent_tree(inode, type))
1149 		return;
1150 
1151 	write_lock(&et->lock);
1152 	if (type == EX_READ) {
1153 		set_inode_flag(inode, FI_NO_EXTENT);
1154 		if (et->largest.len) {
1155 			et->largest.len = 0;
1156 			updated = true;
1157 		}
1158 	}
1159 	write_unlock(&et->lock);
1160 
1161 	__destroy_extent_node(inode, type);
1162 
1163 	if (updated)
1164 		f2fs_mark_inode_dirty_sync(inode, true);
1165 }
1166 
1167 void f2fs_drop_extent_tree(struct inode *inode)
1168 {
1169 	__drop_extent_tree(inode, EX_READ);
1170 	__drop_extent_tree(inode, EX_BLOCK_AGE);
1171 }
1172 
1173 static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
1174 {
1175 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1176 	struct extent_tree_info *eti = &sbi->extent_tree[type];
1177 	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1178 	unsigned int node_cnt = 0;
1179 
1180 	if (!et)
1181 		return;
1182 
1183 	if (inode->i_nlink && !is_bad_inode(inode) &&
1184 					atomic_read(&et->node_cnt)) {
1185 		mutex_lock(&eti->extent_tree_lock);
1186 		list_add_tail(&et->list, &eti->zombie_list);
1187 		atomic_inc(&eti->total_zombie_tree);
1188 		mutex_unlock(&eti->extent_tree_lock);
1189 		return;
1190 	}
1191 
1192 	/* free all extent info belong to this extent tree */
1193 	node_cnt = __destroy_extent_node(inode, type);
1194 
1195 	/* delete extent tree entry in radix tree */
1196 	mutex_lock(&eti->extent_tree_lock);
1197 	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
1198 	radix_tree_delete(&eti->extent_tree_root, inode->i_ino);
1199 	kmem_cache_free(extent_tree_slab, et);
1200 	atomic_dec(&eti->total_ext_tree);
1201 	mutex_unlock(&eti->extent_tree_lock);
1202 
1203 	F2FS_I(inode)->extent_tree[type] = NULL;
1204 
1205 	trace_f2fs_destroy_extent_tree(inode, node_cnt, type);
1206 }
1207 
1208 void f2fs_destroy_extent_tree(struct inode *inode)
1209 {
1210 	__destroy_extent_tree(inode, EX_READ);
1211 	__destroy_extent_tree(inode, EX_BLOCK_AGE);
1212 }
1213 
1214 static void __init_extent_tree_info(struct extent_tree_info *eti)
1215 {
1216 	INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO);
1217 	mutex_init(&eti->extent_tree_lock);
1218 	INIT_LIST_HEAD(&eti->extent_list);
1219 	spin_lock_init(&eti->extent_lock);
1220 	atomic_set(&eti->total_ext_tree, 0);
1221 	INIT_LIST_HEAD(&eti->zombie_list);
1222 	atomic_set(&eti->total_zombie_tree, 0);
1223 	atomic_set(&eti->total_ext_node, 0);
1224 }
1225 
1226 void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
1227 {
1228 	__init_extent_tree_info(&sbi->extent_tree[EX_READ]);
1229 	__init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]);
1230 
1231 	/* initialize for block age extents */
1232 	atomic64_set(&sbi->allocated_data_blocks, 0);
1233 	sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
1234 	sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
1235 	sbi->last_age_weight = LAST_AGE_WEIGHT;
1236 	sbi->max_read_extent_count = DEF_MAX_READ_EXTENT_COUNT;
1237 }
1238 
1239 int __init f2fs_create_extent_cache(void)
1240 {
1241 	extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
1242 			sizeof(struct extent_tree));
1243 	if (!extent_tree_slab)
1244 		return -ENOMEM;
1245 	extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
1246 			sizeof(struct extent_node));
1247 	if (!extent_node_slab) {
1248 		kmem_cache_destroy(extent_tree_slab);
1249 		return -ENOMEM;
1250 	}
1251 	return 0;
1252 }
1253 
1254 void f2fs_destroy_extent_cache(void)
1255 {
1256 	kmem_cache_destroy(extent_node_slab);
1257 	kmem_cache_destroy(extent_tree_slab);
1258 }
1259