xref: /linux/fs/f2fs/extent_cache.c (revision 906b77ca91c7e9833b4e47bedb6bec76be71d497)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * f2fs extent cache support
4  *
5  * Copyright (c) 2015 Motorola Mobility
6  * Copyright (c) 2015 Samsung Electronics
7  * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
8  *          Chao Yu <chao2.yu@samsung.com>
9  *
10  * block_age-based extent cache added by:
11  * Copyright (c) 2022 xiaomi Co., Ltd.
12  *             http://www.xiaomi.com/
13  */
14 
15 #include <linux/fs.h>
16 #include <linux/f2fs_fs.h>
17 
18 #include "f2fs.h"
19 #include "node.h"
20 #include <trace/events/f2fs.h>
21 
22 bool sanity_check_extent_cache(struct inode *inode, struct page *ipage)
23 {
24 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
25 	struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
26 	struct extent_info ei;
27 
28 	get_read_extent_info(&ei, i_ext);
29 
30 	if (!ei.len)
31 		return true;
32 
33 	if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) ||
34 	    !f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1,
35 					DATA_GENERIC_ENHANCE)) {
36 		f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
37 			  __func__, inode->i_ino,
38 			  ei.blk, ei.fofs, ei.len);
39 		return false;
40 	}
41 	return true;
42 }
43 
44 static void __set_extent_info(struct extent_info *ei,
45 				unsigned int fofs, unsigned int len,
46 				block_t blk, bool keep_clen,
47 				unsigned long age, unsigned long last_blocks,
48 				enum extent_type type)
49 {
50 	ei->fofs = fofs;
51 	ei->len = len;
52 
53 	if (type == EX_READ) {
54 		ei->blk = blk;
55 		if (keep_clen)
56 			return;
57 #ifdef CONFIG_F2FS_FS_COMPRESSION
58 		ei->c_len = 0;
59 #endif
60 	} else if (type == EX_BLOCK_AGE) {
61 		ei->age = age;
62 		ei->last_blocks = last_blocks;
63 	}
64 }
65 
66 static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
67 {
68 	if (type == EX_READ)
69 		return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) &&
70 			S_ISREG(inode->i_mode);
71 	if (type == EX_BLOCK_AGE)
72 		return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) &&
73 			(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode));
74 	return false;
75 }
76 
77 static bool __may_extent_tree(struct inode *inode, enum extent_type type)
78 {
79 	/*
80 	 * for recovered files during mount do not create extents
81 	 * if shrinker is not registered.
82 	 */
83 	if (list_empty(&F2FS_I_SB(inode)->s_list))
84 		return false;
85 
86 	if (!__init_may_extent_tree(inode, type))
87 		return false;
88 
89 	if (type == EX_READ) {
90 		if (is_inode_flag_set(inode, FI_NO_EXTENT))
91 			return false;
92 		if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
93 				 !f2fs_sb_has_readonly(F2FS_I_SB(inode)))
94 			return false;
95 	} else if (type == EX_BLOCK_AGE) {
96 		if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
97 			return false;
98 		if (file_is_cold(inode))
99 			return false;
100 	}
101 	return true;
102 }
103 
104 static void __try_update_largest_extent(struct extent_tree *et,
105 						struct extent_node *en)
106 {
107 	if (et->type != EX_READ)
108 		return;
109 	if (en->ei.len <= et->largest.len)
110 		return;
111 
112 	et->largest = en->ei;
113 	et->largest_updated = true;
114 }
115 
116 static bool __is_extent_mergeable(struct extent_info *back,
117 		struct extent_info *front, enum extent_type type)
118 {
119 	if (type == EX_READ) {
120 #ifdef CONFIG_F2FS_FS_COMPRESSION
121 		if (back->c_len && back->len != back->c_len)
122 			return false;
123 		if (front->c_len && front->len != front->c_len)
124 			return false;
125 #endif
126 		return (back->fofs + back->len == front->fofs &&
127 				back->blk + back->len == front->blk);
128 	} else if (type == EX_BLOCK_AGE) {
129 		return (back->fofs + back->len == front->fofs &&
130 			abs(back->age - front->age) <= SAME_AGE_REGION &&
131 			abs(back->last_blocks - front->last_blocks) <=
132 							SAME_AGE_REGION);
133 	}
134 	return false;
135 }
136 
137 static bool __is_back_mergeable(struct extent_info *cur,
138 		struct extent_info *back, enum extent_type type)
139 {
140 	return __is_extent_mergeable(back, cur, type);
141 }
142 
143 static bool __is_front_mergeable(struct extent_info *cur,
144 		struct extent_info *front, enum extent_type type)
145 {
146 	return __is_extent_mergeable(cur, front, type);
147 }
148 
149 static struct extent_node *__lookup_extent_node(struct rb_root_cached *root,
150 			struct extent_node *cached_en, unsigned int fofs)
151 {
152 	struct rb_node *node = root->rb_root.rb_node;
153 	struct extent_node *en;
154 
155 	/* check a cached entry */
156 	if (cached_en && cached_en->ei.fofs <= fofs &&
157 			cached_en->ei.fofs + cached_en->ei.len > fofs)
158 		return cached_en;
159 
160 	/* check rb_tree */
161 	while (node) {
162 		en = rb_entry(node, struct extent_node, rb_node);
163 
164 		if (fofs < en->ei.fofs)
165 			node = node->rb_left;
166 		else if (fofs >= en->ei.fofs + en->ei.len)
167 			node = node->rb_right;
168 		else
169 			return en;
170 	}
171 	return NULL;
172 }
173 
174 /*
175  * lookup rb entry in position of @fofs in rb-tree,
176  * if hit, return the entry, otherwise, return NULL
177  * @prev_ex: extent before fofs
178  * @next_ex: extent after fofs
179  * @insert_p: insert point for new extent at fofs
180  * in order to simplify the insertion after.
181  * tree must stay unchanged between lookup and insertion.
182  */
183 static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root,
184 				struct extent_node *cached_en,
185 				unsigned int fofs,
186 				struct extent_node **prev_entry,
187 				struct extent_node **next_entry,
188 				struct rb_node ***insert_p,
189 				struct rb_node **insert_parent,
190 				bool *leftmost)
191 {
192 	struct rb_node **pnode = &root->rb_root.rb_node;
193 	struct rb_node *parent = NULL, *tmp_node;
194 	struct extent_node *en = cached_en;
195 
196 	*insert_p = NULL;
197 	*insert_parent = NULL;
198 	*prev_entry = NULL;
199 	*next_entry = NULL;
200 
201 	if (RB_EMPTY_ROOT(&root->rb_root))
202 		return NULL;
203 
204 	if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs)
205 		goto lookup_neighbors;
206 
207 	*leftmost = true;
208 
209 	while (*pnode) {
210 		parent = *pnode;
211 		en = rb_entry(*pnode, struct extent_node, rb_node);
212 
213 		if (fofs < en->ei.fofs) {
214 			pnode = &(*pnode)->rb_left;
215 		} else if (fofs >= en->ei.fofs + en->ei.len) {
216 			pnode = &(*pnode)->rb_right;
217 			*leftmost = false;
218 		} else {
219 			goto lookup_neighbors;
220 		}
221 	}
222 
223 	*insert_p = pnode;
224 	*insert_parent = parent;
225 
226 	en = rb_entry(parent, struct extent_node, rb_node);
227 	tmp_node = parent;
228 	if (parent && fofs > en->ei.fofs)
229 		tmp_node = rb_next(parent);
230 	*next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
231 
232 	tmp_node = parent;
233 	if (parent && fofs < en->ei.fofs)
234 		tmp_node = rb_prev(parent);
235 	*prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
236 	return NULL;
237 
238 lookup_neighbors:
239 	if (fofs == en->ei.fofs) {
240 		/* lookup prev node for merging backward later */
241 		tmp_node = rb_prev(&en->rb_node);
242 		*prev_entry = rb_entry_safe(tmp_node,
243 					struct extent_node, rb_node);
244 	}
245 	if (fofs == en->ei.fofs + en->ei.len - 1) {
246 		/* lookup next node for merging frontward later */
247 		tmp_node = rb_next(&en->rb_node);
248 		*next_entry = rb_entry_safe(tmp_node,
249 					struct extent_node, rb_node);
250 	}
251 	return en;
252 }
253 
254 static struct kmem_cache *extent_tree_slab;
255 static struct kmem_cache *extent_node_slab;
256 
257 static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
258 				struct extent_tree *et, struct extent_info *ei,
259 				struct rb_node *parent, struct rb_node **p,
260 				bool leftmost)
261 {
262 	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
263 	struct extent_node *en;
264 
265 	en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi);
266 	if (!en)
267 		return NULL;
268 
269 	en->ei = *ei;
270 	INIT_LIST_HEAD(&en->list);
271 	en->et = et;
272 
273 	rb_link_node(&en->rb_node, parent, p);
274 	rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
275 	atomic_inc(&et->node_cnt);
276 	atomic_inc(&eti->total_ext_node);
277 	return en;
278 }
279 
280 static void __detach_extent_node(struct f2fs_sb_info *sbi,
281 				struct extent_tree *et, struct extent_node *en)
282 {
283 	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
284 
285 	rb_erase_cached(&en->rb_node, &et->root);
286 	atomic_dec(&et->node_cnt);
287 	atomic_dec(&eti->total_ext_node);
288 
289 	if (et->cached_en == en)
290 		et->cached_en = NULL;
291 	kmem_cache_free(extent_node_slab, en);
292 }
293 
294 /*
295  * Flow to release an extent_node:
296  * 1. list_del_init
297  * 2. __detach_extent_node
298  * 3. kmem_cache_free.
299  */
300 static void __release_extent_node(struct f2fs_sb_info *sbi,
301 			struct extent_tree *et, struct extent_node *en)
302 {
303 	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
304 
305 	spin_lock(&eti->extent_lock);
306 	f2fs_bug_on(sbi, list_empty(&en->list));
307 	list_del_init(&en->list);
308 	spin_unlock(&eti->extent_lock);
309 
310 	__detach_extent_node(sbi, et, en);
311 }
312 
313 static struct extent_tree *__grab_extent_tree(struct inode *inode,
314 						enum extent_type type)
315 {
316 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
317 	struct extent_tree_info *eti = &sbi->extent_tree[type];
318 	struct extent_tree *et;
319 	nid_t ino = inode->i_ino;
320 
321 	mutex_lock(&eti->extent_tree_lock);
322 	et = radix_tree_lookup(&eti->extent_tree_root, ino);
323 	if (!et) {
324 		et = f2fs_kmem_cache_alloc(extent_tree_slab,
325 					GFP_NOFS, true, NULL);
326 		f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et);
327 		memset(et, 0, sizeof(struct extent_tree));
328 		et->ino = ino;
329 		et->type = type;
330 		et->root = RB_ROOT_CACHED;
331 		et->cached_en = NULL;
332 		rwlock_init(&et->lock);
333 		INIT_LIST_HEAD(&et->list);
334 		atomic_set(&et->node_cnt, 0);
335 		atomic_inc(&eti->total_ext_tree);
336 	} else {
337 		atomic_dec(&eti->total_zombie_tree);
338 		list_del_init(&et->list);
339 	}
340 	mutex_unlock(&eti->extent_tree_lock);
341 
342 	/* never died until evict_inode */
343 	F2FS_I(inode)->extent_tree[type] = et;
344 
345 	return et;
346 }
347 
348 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
349 					struct extent_tree *et)
350 {
351 	struct rb_node *node, *next;
352 	struct extent_node *en;
353 	unsigned int count = atomic_read(&et->node_cnt);
354 
355 	node = rb_first_cached(&et->root);
356 	while (node) {
357 		next = rb_next(node);
358 		en = rb_entry(node, struct extent_node, rb_node);
359 		__release_extent_node(sbi, et, en);
360 		node = next;
361 	}
362 
363 	return count - atomic_read(&et->node_cnt);
364 }
365 
366 static void __drop_largest_extent(struct extent_tree *et,
367 					pgoff_t fofs, unsigned int len)
368 {
369 	if (fofs < (pgoff_t)et->largest.fofs + et->largest.len &&
370 			fofs + len > et->largest.fofs) {
371 		et->largest.len = 0;
372 		et->largest_updated = true;
373 	}
374 }
375 
376 void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
377 {
378 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
379 	struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
380 	struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
381 	struct extent_tree *et;
382 	struct extent_node *en;
383 	struct extent_info ei;
384 
385 	if (!__may_extent_tree(inode, EX_READ)) {
386 		/* drop largest read extent */
387 		if (i_ext->len) {
388 			f2fs_wait_on_page_writeback(ipage, NODE, true, true);
389 			i_ext->len = 0;
390 			set_page_dirty(ipage);
391 		}
392 		set_inode_flag(inode, FI_NO_EXTENT);
393 		return;
394 	}
395 
396 	et = __grab_extent_tree(inode, EX_READ);
397 
398 	get_read_extent_info(&ei, i_ext);
399 
400 	write_lock(&et->lock);
401 	if (atomic_read(&et->node_cnt) || !ei.len)
402 		goto skip;
403 
404 	en = __attach_extent_node(sbi, et, &ei, NULL,
405 				&et->root.rb_root.rb_node, true);
406 	if (en) {
407 		et->largest = en->ei;
408 		et->cached_en = en;
409 
410 		spin_lock(&eti->extent_lock);
411 		list_add_tail(&en->list, &eti->extent_list);
412 		spin_unlock(&eti->extent_lock);
413 	}
414 skip:
415 	/* Let's drop, if checkpoint got corrupted. */
416 	if (f2fs_cp_error(sbi)) {
417 		et->largest.len = 0;
418 		et->largest_updated = true;
419 	}
420 	write_unlock(&et->lock);
421 }
422 
423 void f2fs_init_age_extent_tree(struct inode *inode)
424 {
425 	if (!__init_may_extent_tree(inode, EX_BLOCK_AGE))
426 		return;
427 	__grab_extent_tree(inode, EX_BLOCK_AGE);
428 }
429 
430 void f2fs_init_extent_tree(struct inode *inode)
431 {
432 	/* initialize read cache */
433 	if (__init_may_extent_tree(inode, EX_READ))
434 		__grab_extent_tree(inode, EX_READ);
435 
436 	/* initialize block age cache */
437 	if (__init_may_extent_tree(inode, EX_BLOCK_AGE))
438 		__grab_extent_tree(inode, EX_BLOCK_AGE);
439 }
440 
441 static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
442 			struct extent_info *ei, enum extent_type type)
443 {
444 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
445 	struct extent_tree_info *eti = &sbi->extent_tree[type];
446 	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
447 	struct extent_node *en;
448 	bool ret = false;
449 
450 	if (!et)
451 		return false;
452 
453 	trace_f2fs_lookup_extent_tree_start(inode, pgofs, type);
454 
455 	read_lock(&et->lock);
456 
457 	if (type == EX_READ &&
458 			et->largest.fofs <= pgofs &&
459 			(pgoff_t)et->largest.fofs + et->largest.len > pgofs) {
460 		*ei = et->largest;
461 		ret = true;
462 		stat_inc_largest_node_hit(sbi);
463 		goto out;
464 	}
465 
466 	en = __lookup_extent_node(&et->root, et->cached_en, pgofs);
467 	if (!en)
468 		goto out;
469 
470 	if (en == et->cached_en)
471 		stat_inc_cached_node_hit(sbi, type);
472 	else
473 		stat_inc_rbtree_node_hit(sbi, type);
474 
475 	*ei = en->ei;
476 	spin_lock(&eti->extent_lock);
477 	if (!list_empty(&en->list)) {
478 		list_move_tail(&en->list, &eti->extent_list);
479 		et->cached_en = en;
480 	}
481 	spin_unlock(&eti->extent_lock);
482 	ret = true;
483 out:
484 	stat_inc_total_hit(sbi, type);
485 	read_unlock(&et->lock);
486 
487 	if (type == EX_READ)
488 		trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei);
489 	else if (type == EX_BLOCK_AGE)
490 		trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei);
491 	return ret;
492 }
493 
494 static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
495 				struct extent_tree *et, struct extent_info *ei,
496 				struct extent_node *prev_ex,
497 				struct extent_node *next_ex)
498 {
499 	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
500 	struct extent_node *en = NULL;
501 
502 	if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) {
503 		prev_ex->ei.len += ei->len;
504 		ei = &prev_ex->ei;
505 		en = prev_ex;
506 	}
507 
508 	if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) {
509 		next_ex->ei.fofs = ei->fofs;
510 		next_ex->ei.len += ei->len;
511 		if (et->type == EX_READ)
512 			next_ex->ei.blk = ei->blk;
513 		if (en)
514 			__release_extent_node(sbi, et, prev_ex);
515 
516 		en = next_ex;
517 	}
518 
519 	if (!en)
520 		return NULL;
521 
522 	__try_update_largest_extent(et, en);
523 
524 	spin_lock(&eti->extent_lock);
525 	if (!list_empty(&en->list)) {
526 		list_move_tail(&en->list, &eti->extent_list);
527 		et->cached_en = en;
528 	}
529 	spin_unlock(&eti->extent_lock);
530 	return en;
531 }
532 
533 static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
534 				struct extent_tree *et, struct extent_info *ei,
535 				struct rb_node **insert_p,
536 				struct rb_node *insert_parent,
537 				bool leftmost)
538 {
539 	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
540 	struct rb_node **p = &et->root.rb_root.rb_node;
541 	struct rb_node *parent = NULL;
542 	struct extent_node *en = NULL;
543 
544 	if (insert_p && insert_parent) {
545 		parent = insert_parent;
546 		p = insert_p;
547 		goto do_insert;
548 	}
549 
550 	leftmost = true;
551 
552 	/* look up extent_node in the rb tree */
553 	while (*p) {
554 		parent = *p;
555 		en = rb_entry(parent, struct extent_node, rb_node);
556 
557 		if (ei->fofs < en->ei.fofs) {
558 			p = &(*p)->rb_left;
559 		} else if (ei->fofs >= en->ei.fofs + en->ei.len) {
560 			p = &(*p)->rb_right;
561 			leftmost = false;
562 		} else {
563 			f2fs_bug_on(sbi, 1);
564 		}
565 	}
566 
567 do_insert:
568 	en = __attach_extent_node(sbi, et, ei, parent, p, leftmost);
569 	if (!en)
570 		return NULL;
571 
572 	__try_update_largest_extent(et, en);
573 
574 	/* update in global extent list */
575 	spin_lock(&eti->extent_lock);
576 	list_add_tail(&en->list, &eti->extent_list);
577 	et->cached_en = en;
578 	spin_unlock(&eti->extent_lock);
579 	return en;
580 }
581 
582 static void __update_extent_tree_range(struct inode *inode,
583 			struct extent_info *tei, enum extent_type type)
584 {
585 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
586 	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
587 	struct extent_node *en = NULL, *en1 = NULL;
588 	struct extent_node *prev_en = NULL, *next_en = NULL;
589 	struct extent_info ei, dei, prev;
590 	struct rb_node **insert_p = NULL, *insert_parent = NULL;
591 	unsigned int fofs = tei->fofs, len = tei->len;
592 	unsigned int end = fofs + len;
593 	bool updated = false;
594 	bool leftmost = false;
595 
596 	if (!et)
597 		return;
598 
599 	if (type == EX_READ)
600 		trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
601 						tei->blk, 0);
602 	else if (type == EX_BLOCK_AGE)
603 		trace_f2fs_update_age_extent_tree_range(inode, fofs, len,
604 						tei->age, tei->last_blocks);
605 
606 	write_lock(&et->lock);
607 
608 	if (type == EX_READ) {
609 		if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
610 			write_unlock(&et->lock);
611 			return;
612 		}
613 
614 		prev = et->largest;
615 		dei.len = 0;
616 
617 		/*
618 		 * drop largest extent before lookup, in case it's already
619 		 * been shrunk from extent tree
620 		 */
621 		__drop_largest_extent(et, fofs, len);
622 	}
623 
624 	/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
625 	en = __lookup_extent_node_ret(&et->root,
626 					et->cached_en, fofs,
627 					&prev_en, &next_en,
628 					&insert_p, &insert_parent,
629 					&leftmost);
630 	if (!en)
631 		en = next_en;
632 
633 	/* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */
634 	while (en && en->ei.fofs < end) {
635 		unsigned int org_end;
636 		int parts = 0;	/* # of parts current extent split into */
637 
638 		next_en = en1 = NULL;
639 
640 		dei = en->ei;
641 		org_end = dei.fofs + dei.len;
642 		f2fs_bug_on(sbi, fofs >= org_end);
643 
644 		if (fofs > dei.fofs && (type != EX_READ ||
645 				fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) {
646 			en->ei.len = fofs - en->ei.fofs;
647 			prev_en = en;
648 			parts = 1;
649 		}
650 
651 		if (end < org_end && (type != EX_READ ||
652 				org_end - end >= F2FS_MIN_EXTENT_LEN)) {
653 			if (parts) {
654 				__set_extent_info(&ei,
655 					end, org_end - end,
656 					end - dei.fofs + dei.blk, false,
657 					dei.age, dei.last_blocks,
658 					type);
659 				en1 = __insert_extent_tree(sbi, et, &ei,
660 							NULL, NULL, true);
661 				next_en = en1;
662 			} else {
663 				__set_extent_info(&en->ei,
664 					end, en->ei.len - (end - dei.fofs),
665 					en->ei.blk + (end - dei.fofs), true,
666 					dei.age, dei.last_blocks,
667 					type);
668 				next_en = en;
669 			}
670 			parts++;
671 		}
672 
673 		if (!next_en) {
674 			struct rb_node *node = rb_next(&en->rb_node);
675 
676 			next_en = rb_entry_safe(node, struct extent_node,
677 						rb_node);
678 		}
679 
680 		if (parts)
681 			__try_update_largest_extent(et, en);
682 		else
683 			__release_extent_node(sbi, et, en);
684 
685 		/*
686 		 * if original extent is split into zero or two parts, extent
687 		 * tree has been altered by deletion or insertion, therefore
688 		 * invalidate pointers regard to tree.
689 		 */
690 		if (parts != 1) {
691 			insert_p = NULL;
692 			insert_parent = NULL;
693 		}
694 		en = next_en;
695 	}
696 
697 	if (type == EX_BLOCK_AGE)
698 		goto update_age_extent_cache;
699 
700 	/* 3. update extent in read extent cache */
701 	BUG_ON(type != EX_READ);
702 
703 	if (tei->blk) {
704 		__set_extent_info(&ei, fofs, len, tei->blk, false,
705 				  0, 0, EX_READ);
706 		if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
707 			__insert_extent_tree(sbi, et, &ei,
708 					insert_p, insert_parent, leftmost);
709 
710 		/* give up extent_cache, if split and small updates happen */
711 		if (dei.len >= 1 &&
712 				prev.len < F2FS_MIN_EXTENT_LEN &&
713 				et->largest.len < F2FS_MIN_EXTENT_LEN) {
714 			et->largest.len = 0;
715 			et->largest_updated = true;
716 			set_inode_flag(inode, FI_NO_EXTENT);
717 		}
718 	}
719 
720 	if (is_inode_flag_set(inode, FI_NO_EXTENT))
721 		__free_extent_tree(sbi, et);
722 
723 	if (et->largest_updated) {
724 		et->largest_updated = false;
725 		updated = true;
726 	}
727 	goto out_read_extent_cache;
728 update_age_extent_cache:
729 	if (!tei->last_blocks)
730 		goto out_read_extent_cache;
731 
732 	__set_extent_info(&ei, fofs, len, 0, false,
733 			tei->age, tei->last_blocks, EX_BLOCK_AGE);
734 	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
735 		__insert_extent_tree(sbi, et, &ei,
736 					insert_p, insert_parent, leftmost);
737 out_read_extent_cache:
738 	write_unlock(&et->lock);
739 
740 	if (updated)
741 		f2fs_mark_inode_dirty_sync(inode, true);
742 }
743 
744 #ifdef CONFIG_F2FS_FS_COMPRESSION
745 void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
746 				pgoff_t fofs, block_t blkaddr, unsigned int llen,
747 				unsigned int c_len)
748 {
749 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
750 	struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
751 	struct extent_node *en = NULL;
752 	struct extent_node *prev_en = NULL, *next_en = NULL;
753 	struct extent_info ei;
754 	struct rb_node **insert_p = NULL, *insert_parent = NULL;
755 	bool leftmost = false;
756 
757 	trace_f2fs_update_read_extent_tree_range(inode, fofs, llen,
758 						blkaddr, c_len);
759 
760 	/* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
761 	if (is_inode_flag_set(inode, FI_NO_EXTENT))
762 		return;
763 
764 	write_lock(&et->lock);
765 
766 	en = __lookup_extent_node_ret(&et->root,
767 					et->cached_en, fofs,
768 					&prev_en, &next_en,
769 					&insert_p, &insert_parent,
770 					&leftmost);
771 	if (en)
772 		goto unlock_out;
773 
774 	__set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ);
775 	ei.c_len = c_len;
776 
777 	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
778 		__insert_extent_tree(sbi, et, &ei,
779 				insert_p, insert_parent, leftmost);
780 unlock_out:
781 	write_unlock(&et->lock);
782 }
783 #endif
784 
785 static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi,
786 						unsigned long long new,
787 						unsigned long long old)
788 {
789 	unsigned int rem_old, rem_new;
790 	unsigned long long res;
791 	unsigned int weight = sbi->last_age_weight;
792 
793 	res = div_u64_rem(new, 100, &rem_new) * (100 - weight)
794 		+ div_u64_rem(old, 100, &rem_old) * weight;
795 
796 	if (rem_new)
797 		res += rem_new * (100 - weight) / 100;
798 	if (rem_old)
799 		res += rem_old * weight / 100;
800 
801 	return res;
802 }
803 
804 /* This returns a new age and allocated blocks in ei */
805 static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
806 						block_t blkaddr)
807 {
808 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
809 	loff_t f_size = i_size_read(inode);
810 	unsigned long long cur_blocks =
811 				atomic64_read(&sbi->allocated_data_blocks);
812 	struct extent_info tei = *ei;	/* only fofs and len are valid */
813 
814 	/*
815 	 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last
816 	 * file block even in seq write. So don't record age for newly last file
817 	 * block here.
818 	 */
819 	if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) &&
820 			blkaddr == NEW_ADDR)
821 		return -EINVAL;
822 
823 	if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) {
824 		unsigned long long cur_age;
825 
826 		if (cur_blocks >= tei.last_blocks)
827 			cur_age = cur_blocks - tei.last_blocks;
828 		else
829 			/* allocated_data_blocks overflow */
830 			cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks;
831 
832 		if (tei.age)
833 			ei->age = __calculate_block_age(sbi, cur_age, tei.age);
834 		else
835 			ei->age = cur_age;
836 		ei->last_blocks = cur_blocks;
837 		WARN_ON(ei->age > cur_blocks);
838 		return 0;
839 	}
840 
841 	f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
842 
843 	/* the data block was allocated for the first time */
844 	if (blkaddr == NEW_ADDR)
845 		goto out;
846 
847 	if (__is_valid_data_blkaddr(blkaddr) &&
848 	    !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE))
849 		return -EINVAL;
850 out:
851 	/*
852 	 * init block age with zero, this can happen when the block age extent
853 	 * was reclaimed due to memory constraint or system reboot
854 	 */
855 	ei->age = 0;
856 	ei->last_blocks = cur_blocks;
857 	return 0;
858 }
859 
860 static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
861 {
862 	struct extent_info ei = {};
863 
864 	if (!__may_extent_tree(dn->inode, type))
865 		return;
866 
867 	ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
868 								dn->ofs_in_node;
869 	ei.len = 1;
870 
871 	if (type == EX_READ) {
872 		if (dn->data_blkaddr == NEW_ADDR)
873 			ei.blk = NULL_ADDR;
874 		else
875 			ei.blk = dn->data_blkaddr;
876 	} else if (type == EX_BLOCK_AGE) {
877 		if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr))
878 			return;
879 	}
880 	__update_extent_tree_range(dn->inode, &ei, type);
881 }
882 
883 static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink,
884 					enum extent_type type)
885 {
886 	struct extent_tree_info *eti = &sbi->extent_tree[type];
887 	struct extent_tree *et, *next;
888 	struct extent_node *en;
889 	unsigned int node_cnt = 0, tree_cnt = 0;
890 	int remained;
891 
892 	if (!atomic_read(&eti->total_zombie_tree))
893 		goto free_node;
894 
895 	if (!mutex_trylock(&eti->extent_tree_lock))
896 		goto out;
897 
898 	/* 1. remove unreferenced extent tree */
899 	list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
900 		if (atomic_read(&et->node_cnt)) {
901 			write_lock(&et->lock);
902 			node_cnt += __free_extent_tree(sbi, et);
903 			write_unlock(&et->lock);
904 		}
905 		f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
906 		list_del_init(&et->list);
907 		radix_tree_delete(&eti->extent_tree_root, et->ino);
908 		kmem_cache_free(extent_tree_slab, et);
909 		atomic_dec(&eti->total_ext_tree);
910 		atomic_dec(&eti->total_zombie_tree);
911 		tree_cnt++;
912 
913 		if (node_cnt + tree_cnt >= nr_shrink)
914 			goto unlock_out;
915 		cond_resched();
916 	}
917 	mutex_unlock(&eti->extent_tree_lock);
918 
919 free_node:
920 	/* 2. remove LRU extent entries */
921 	if (!mutex_trylock(&eti->extent_tree_lock))
922 		goto out;
923 
924 	remained = nr_shrink - (node_cnt + tree_cnt);
925 
926 	spin_lock(&eti->extent_lock);
927 	for (; remained > 0; remained--) {
928 		if (list_empty(&eti->extent_list))
929 			break;
930 		en = list_first_entry(&eti->extent_list,
931 					struct extent_node, list);
932 		et = en->et;
933 		if (!write_trylock(&et->lock)) {
934 			/* refresh this extent node's position in extent list */
935 			list_move_tail(&en->list, &eti->extent_list);
936 			continue;
937 		}
938 
939 		list_del_init(&en->list);
940 		spin_unlock(&eti->extent_lock);
941 
942 		__detach_extent_node(sbi, et, en);
943 
944 		write_unlock(&et->lock);
945 		node_cnt++;
946 		spin_lock(&eti->extent_lock);
947 	}
948 	spin_unlock(&eti->extent_lock);
949 
950 unlock_out:
951 	mutex_unlock(&eti->extent_tree_lock);
952 out:
953 	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type);
954 
955 	return node_cnt + tree_cnt;
956 }
957 
958 /* read extent cache operations */
959 bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
960 				struct extent_info *ei)
961 {
962 	if (!__may_extent_tree(inode, EX_READ))
963 		return false;
964 
965 	return __lookup_extent_tree(inode, pgofs, ei, EX_READ);
966 }
967 
968 bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index,
969 				block_t *blkaddr)
970 {
971 	struct extent_info ei = {};
972 
973 	if (!f2fs_lookup_read_extent_cache(inode, index, &ei))
974 		return false;
975 	*blkaddr = ei.blk + index - ei.fofs;
976 	return true;
977 }
978 
979 void f2fs_update_read_extent_cache(struct dnode_of_data *dn)
980 {
981 	return __update_extent_cache(dn, EX_READ);
982 }
983 
984 void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
985 				pgoff_t fofs, block_t blkaddr, unsigned int len)
986 {
987 	struct extent_info ei = {
988 		.fofs = fofs,
989 		.len = len,
990 		.blk = blkaddr,
991 	};
992 
993 	if (!__may_extent_tree(dn->inode, EX_READ))
994 		return;
995 
996 	__update_extent_tree_range(dn->inode, &ei, EX_READ);
997 }
998 
999 unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1000 {
1001 	if (!test_opt(sbi, READ_EXTENT_CACHE))
1002 		return 0;
1003 
1004 	return __shrink_extent_tree(sbi, nr_shrink, EX_READ);
1005 }
1006 
1007 /* block age extent cache operations */
1008 bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
1009 				struct extent_info *ei)
1010 {
1011 	if (!__may_extent_tree(inode, EX_BLOCK_AGE))
1012 		return false;
1013 
1014 	return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE);
1015 }
1016 
1017 void f2fs_update_age_extent_cache(struct dnode_of_data *dn)
1018 {
1019 	return __update_extent_cache(dn, EX_BLOCK_AGE);
1020 }
1021 
1022 void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
1023 				pgoff_t fofs, unsigned int len)
1024 {
1025 	struct extent_info ei = {
1026 		.fofs = fofs,
1027 		.len = len,
1028 	};
1029 
1030 	if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE))
1031 		return;
1032 
1033 	__update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE);
1034 }
1035 
1036 unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1037 {
1038 	if (!test_opt(sbi, AGE_EXTENT_CACHE))
1039 		return 0;
1040 
1041 	return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);
1042 }
1043 
1044 static unsigned int __destroy_extent_node(struct inode *inode,
1045 					enum extent_type type)
1046 {
1047 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1048 	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1049 	unsigned int node_cnt = 0;
1050 
1051 	if (!et || !atomic_read(&et->node_cnt))
1052 		return 0;
1053 
1054 	write_lock(&et->lock);
1055 	node_cnt = __free_extent_tree(sbi, et);
1056 	write_unlock(&et->lock);
1057 
1058 	return node_cnt;
1059 }
1060 
1061 void f2fs_destroy_extent_node(struct inode *inode)
1062 {
1063 	__destroy_extent_node(inode, EX_READ);
1064 	__destroy_extent_node(inode, EX_BLOCK_AGE);
1065 }
1066 
1067 static void __drop_extent_tree(struct inode *inode, enum extent_type type)
1068 {
1069 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1070 	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1071 	bool updated = false;
1072 
1073 	if (!__may_extent_tree(inode, type))
1074 		return;
1075 
1076 	write_lock(&et->lock);
1077 	__free_extent_tree(sbi, et);
1078 	if (type == EX_READ) {
1079 		set_inode_flag(inode, FI_NO_EXTENT);
1080 		if (et->largest.len) {
1081 			et->largest.len = 0;
1082 			updated = true;
1083 		}
1084 	}
1085 	write_unlock(&et->lock);
1086 	if (updated)
1087 		f2fs_mark_inode_dirty_sync(inode, true);
1088 }
1089 
1090 void f2fs_drop_extent_tree(struct inode *inode)
1091 {
1092 	__drop_extent_tree(inode, EX_READ);
1093 	__drop_extent_tree(inode, EX_BLOCK_AGE);
1094 }
1095 
1096 static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
1097 {
1098 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1099 	struct extent_tree_info *eti = &sbi->extent_tree[type];
1100 	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1101 	unsigned int node_cnt = 0;
1102 
1103 	if (!et)
1104 		return;
1105 
1106 	if (inode->i_nlink && !is_bad_inode(inode) &&
1107 					atomic_read(&et->node_cnt)) {
1108 		mutex_lock(&eti->extent_tree_lock);
1109 		list_add_tail(&et->list, &eti->zombie_list);
1110 		atomic_inc(&eti->total_zombie_tree);
1111 		mutex_unlock(&eti->extent_tree_lock);
1112 		return;
1113 	}
1114 
1115 	/* free all extent info belong to this extent tree */
1116 	node_cnt = __destroy_extent_node(inode, type);
1117 
1118 	/* delete extent tree entry in radix tree */
1119 	mutex_lock(&eti->extent_tree_lock);
1120 	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
1121 	radix_tree_delete(&eti->extent_tree_root, inode->i_ino);
1122 	kmem_cache_free(extent_tree_slab, et);
1123 	atomic_dec(&eti->total_ext_tree);
1124 	mutex_unlock(&eti->extent_tree_lock);
1125 
1126 	F2FS_I(inode)->extent_tree[type] = NULL;
1127 
1128 	trace_f2fs_destroy_extent_tree(inode, node_cnt, type);
1129 }
1130 
1131 void f2fs_destroy_extent_tree(struct inode *inode)
1132 {
1133 	__destroy_extent_tree(inode, EX_READ);
1134 	__destroy_extent_tree(inode, EX_BLOCK_AGE);
1135 }
1136 
1137 static void __init_extent_tree_info(struct extent_tree_info *eti)
1138 {
1139 	INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO);
1140 	mutex_init(&eti->extent_tree_lock);
1141 	INIT_LIST_HEAD(&eti->extent_list);
1142 	spin_lock_init(&eti->extent_lock);
1143 	atomic_set(&eti->total_ext_tree, 0);
1144 	INIT_LIST_HEAD(&eti->zombie_list);
1145 	atomic_set(&eti->total_zombie_tree, 0);
1146 	atomic_set(&eti->total_ext_node, 0);
1147 }
1148 
1149 void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
1150 {
1151 	__init_extent_tree_info(&sbi->extent_tree[EX_READ]);
1152 	__init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]);
1153 
1154 	/* initialize for block age extents */
1155 	atomic64_set(&sbi->allocated_data_blocks, 0);
1156 	sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
1157 	sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
1158 	sbi->last_age_weight = LAST_AGE_WEIGHT;
1159 }
1160 
1161 int __init f2fs_create_extent_cache(void)
1162 {
1163 	extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
1164 			sizeof(struct extent_tree));
1165 	if (!extent_tree_slab)
1166 		return -ENOMEM;
1167 	extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
1168 			sizeof(struct extent_node));
1169 	if (!extent_node_slab) {
1170 		kmem_cache_destroy(extent_tree_slab);
1171 		return -ENOMEM;
1172 	}
1173 	return 0;
1174 }
1175 
1176 void f2fs_destroy_extent_cache(void)
1177 {
1178 	kmem_cache_destroy(extent_node_slab);
1179 	kmem_cache_destroy(extent_tree_slab);
1180 }
1181