xref: /linux/fs/ext4/extents_status.c (revision 0ea5c948cb64bab5bc7a5516774eb8536f05aa0d)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
2654598beSZheng Liu /*
3654598beSZheng Liu  *  fs/ext4/extents_status.c
4654598beSZheng Liu  *
5654598beSZheng Liu  * Written by Yongqiang Yang <xiaoqiangnk@gmail.com>
6654598beSZheng Liu  * Modified by
7654598beSZheng Liu  *	Allison Henderson <achender@linux.vnet.ibm.com>
8654598beSZheng Liu  *	Hugh Dickins <hughd@google.com>
9654598beSZheng Liu  *	Zheng Liu <wenqing.lz@taobao.com>
10654598beSZheng Liu  *
11654598beSZheng Liu  * Ext4 extents status tree core functions.
12654598beSZheng Liu  */
13d3922a77SZheng Liu #include <linux/list_sort.h>
14eb68d0e2SZheng Liu #include <linux/proc_fs.h>
15eb68d0e2SZheng Liu #include <linux/seq_file.h>
16654598beSZheng Liu #include "ext4.h"
17654598beSZheng Liu 
18992e9fddSZheng Liu #include <trace/events/ext4.h>
19992e9fddSZheng Liu 
20654598beSZheng Liu /*
21654598beSZheng Liu  * According to previous discussion in Ext4 Developer Workshop, we
22654598beSZheng Liu  * will introduce a new structure called io tree to track all extent
23654598beSZheng Liu  * status in order to solve some problems that we have met
24654598beSZheng Liu  * (e.g. Reservation space warning), and provide extent-level locking.
25654598beSZheng Liu  * Delay extent tree is the first step to achieve this goal.  It is
26654598beSZheng Liu  * original built by Yongqiang Yang.  At that time it is called delay
2706b0c886SZheng Liu  * extent tree, whose goal is only track delayed extents in memory to
28654598beSZheng Liu  * simplify the implementation of fiemap and bigalloc, and introduce
29654598beSZheng Liu  * lseek SEEK_DATA/SEEK_HOLE support.  That is why it is still called
3006b0c886SZheng Liu  * delay extent tree at the first commit.  But for better understand
3106b0c886SZheng Liu  * what it does, it has been rename to extent status tree.
32654598beSZheng Liu  *
3306b0c886SZheng Liu  * Step1:
3406b0c886SZheng Liu  * Currently the first step has been done.  All delayed extents are
3506b0c886SZheng Liu  * tracked in the tree.  It maintains the delayed extent when a delayed
3606b0c886SZheng Liu  * allocation is issued, and the delayed extent is written out or
37654598beSZheng Liu  * invalidated.  Therefore the implementation of fiemap and bigalloc
38654598beSZheng Liu  * are simplified, and SEEK_DATA/SEEK_HOLE are introduced.
39654598beSZheng Liu  *
40654598beSZheng Liu  * The following comment describes the implemenmtation of extent
41654598beSZheng Liu  * status tree and future works.
4206b0c886SZheng Liu  *
4306b0c886SZheng Liu  * Step2:
4406b0c886SZheng Liu  * In this step all extent status are tracked by extent status tree.
4506b0c886SZheng Liu  * Thus, we can first try to lookup a block mapping in this tree before
4606b0c886SZheng Liu  * finding it in extent tree.  Hence, single extent cache can be removed
4706b0c886SZheng Liu  * because extent status tree can do a better job.  Extents in status
4806b0c886SZheng Liu  * tree are loaded on-demand.  Therefore, the extent status tree may not
4906b0c886SZheng Liu  * contain all of the extents in a file.  Meanwhile we define a shrinker
5006b0c886SZheng Liu  * to reclaim memory from extent status tree because fragmented extent
5106b0c886SZheng Liu  * tree will make status tree cost too much memory.  written/unwritten/-
5206b0c886SZheng Liu  * hole extents in the tree will be reclaimed by this shrinker when we
5306b0c886SZheng Liu  * are under high memory pressure.  Delayed extents will not be
5406b0c886SZheng Liu  * reclimed because fiemap, bigalloc, and seek_data/hole need it.
55654598beSZheng Liu  */
56654598beSZheng Liu 
57654598beSZheng Liu /*
5806b0c886SZheng Liu  * Extent status tree implementation for ext4.
59654598beSZheng Liu  *
60654598beSZheng Liu  *
61654598beSZheng Liu  * ==========================================================================
6206b0c886SZheng Liu  * Extent status tree tracks all extent status.
63654598beSZheng Liu  *
6406b0c886SZheng Liu  * 1. Why we need to implement extent status tree?
65654598beSZheng Liu  *
6606b0c886SZheng Liu  * Without extent status tree, ext4 identifies a delayed extent by looking
67654598beSZheng Liu  * up page cache, this has several deficiencies - complicated, buggy,
68654598beSZheng Liu  * and inefficient code.
69654598beSZheng Liu  *
7006b0c886SZheng Liu  * FIEMAP, SEEK_HOLE/DATA, bigalloc, and writeout all need to know if a
7106b0c886SZheng Liu  * block or a range of blocks are belonged to a delayed extent.
72654598beSZheng Liu  *
7306b0c886SZheng Liu  * Let us have a look at how they do without extent status tree.
74654598beSZheng Liu  *   --	FIEMAP
75654598beSZheng Liu  *	FIEMAP looks up page cache to identify delayed allocations from holes.
76654598beSZheng Liu  *
77654598beSZheng Liu  *   --	SEEK_HOLE/DATA
78654598beSZheng Liu  *	SEEK_HOLE/DATA has the same problem as FIEMAP.
79654598beSZheng Liu  *
80654598beSZheng Liu  *   --	bigalloc
81654598beSZheng Liu  *	bigalloc looks up page cache to figure out if a block is
82654598beSZheng Liu  *	already under delayed allocation or not to determine whether
83654598beSZheng Liu  *	quota reserving is needed for the cluster.
84654598beSZheng Liu  *
85654598beSZheng Liu  *   --	writeout
86654598beSZheng Liu  *	Writeout looks up whole page cache to see if a buffer is
87654598beSZheng Liu  *	mapped, If there are not very many delayed buffers, then it is
883f8b6fb7SMasahiro Yamada  *	time consuming.
89654598beSZheng Liu  *
9006b0c886SZheng Liu  * With extent status tree implementation, FIEMAP, SEEK_HOLE/DATA,
91654598beSZheng Liu  * bigalloc and writeout can figure out if a block or a range of
92654598beSZheng Liu  * blocks is under delayed allocation(belonged to a delayed extent) or
9306b0c886SZheng Liu  * not by searching the extent tree.
94654598beSZheng Liu  *
95654598beSZheng Liu  *
96654598beSZheng Liu  * ==========================================================================
9706b0c886SZheng Liu  * 2. Ext4 extent status tree impelmentation
98654598beSZheng Liu  *
9906b0c886SZheng Liu  *   --	extent
10006b0c886SZheng Liu  *	A extent is a range of blocks which are contiguous logically and
10106b0c886SZheng Liu  *	physically.  Unlike extent in extent tree, this extent in ext4 is
10206b0c886SZheng Liu  *	a in-memory struct, there is no corresponding on-disk data.  There
10306b0c886SZheng Liu  *	is no limit on length of extent, so an extent can contain as many
10406b0c886SZheng Liu  *	blocks as they are contiguous logically and physically.
105654598beSZheng Liu  *
10606b0c886SZheng Liu  *   --	extent status tree
10706b0c886SZheng Liu  *	Every inode has an extent status tree and all allocation blocks
10806b0c886SZheng Liu  *	are added to the tree with different status.  The extent in the
10906b0c886SZheng Liu  *	tree are ordered by logical block no.
110654598beSZheng Liu  *
11106b0c886SZheng Liu  *   --	operations on a extent status tree
11206b0c886SZheng Liu  *	There are three important operations on a delayed extent tree: find
11306b0c886SZheng Liu  *	next extent, adding a extent(a range of blocks) and removing a extent.
114654598beSZheng Liu  *
11506b0c886SZheng Liu  *   --	race on a extent status tree
11606b0c886SZheng Liu  *	Extent status tree is protected by inode->i_es_lock.
11706b0c886SZheng Liu  *
11806b0c886SZheng Liu  *   --	memory consumption
11906b0c886SZheng Liu  *      Fragmented extent tree will make extent status tree cost too much
12006b0c886SZheng Liu  *      memory.  Hence, we will reclaim written/unwritten/hole extents from
12106b0c886SZheng Liu  *      the tree under a heavy memory pressure.
122654598beSZheng Liu  *
123654598beSZheng Liu  *
124654598beSZheng Liu  * ==========================================================================
12506b0c886SZheng Liu  * 3. Performance analysis
12606b0c886SZheng Liu  *
127654598beSZheng Liu  *   --	overhead
128654598beSZheng Liu  *	1. There is a cache extent for write access, so if writes are
129654598beSZheng Liu  *	not very random, adding space operaions are in O(1) time.
130654598beSZheng Liu  *
131654598beSZheng Liu  *   --	gain
132654598beSZheng Liu  *	2. Code is much simpler, more readable, more maintainable and
133654598beSZheng Liu  *	more efficient.
134654598beSZheng Liu  *
135654598beSZheng Liu  *
136654598beSZheng Liu  * ==========================================================================
137654598beSZheng Liu  * 4. TODO list
138654598beSZheng Liu  *
13906b0c886SZheng Liu  *   -- Refactor delayed space reservation
140654598beSZheng Liu  *
141654598beSZheng Liu  *   -- Extent-level locking
142654598beSZheng Liu  */
143654598beSZheng Liu 
144654598beSZheng Liu static struct kmem_cache *ext4_es_cachep;
1451dc0aa46SEric Whitney static struct kmem_cache *ext4_pending_cachep;
146654598beSZheng Liu 
14795f0b320SBaokun Li static int __es_insert_extent(struct inode *inode, struct extent_status *newes,
14895f0b320SBaokun Li 			      struct extent_status *prealloc);
149bdedbb7bSZheng Liu static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
150bda3efafSBaokun Li 			      ext4_lblk_t end, int *reserved,
151bda3efafSBaokun Li 			      struct extent_status *prealloc);
152dd475925SJan Kara static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
153edaa53caSZheng Liu static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
154e15f742cSTheodore Ts'o 		       struct ext4_inode_info *locked_ei);
155*8e387c89SZhang Yi static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
156*8e387c89SZhang Yi 			    ext4_lblk_t len,
157*8e387c89SZhang Yi 			    struct pending_reservation **prealloc);
15806b0c886SZheng Liu 
ext4_init_es(void)159654598beSZheng Liu int __init ext4_init_es(void)
160654598beSZheng Liu {
161060f7739SJunChao Sun 	ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT);
162654598beSZheng Liu 	if (ext4_es_cachep == NULL)
163654598beSZheng Liu 		return -ENOMEM;
164654598beSZheng Liu 	return 0;
165654598beSZheng Liu }
166654598beSZheng Liu 
ext4_exit_es(void)167654598beSZheng Liu void ext4_exit_es(void)
168654598beSZheng Liu {
169654598beSZheng Liu 	kmem_cache_destroy(ext4_es_cachep);
170654598beSZheng Liu }
171654598beSZheng Liu 
ext4_es_init_tree(struct ext4_es_tree * tree)172654598beSZheng Liu void ext4_es_init_tree(struct ext4_es_tree *tree)
173654598beSZheng Liu {
174654598beSZheng Liu 	tree->root = RB_ROOT;
175654598beSZheng Liu 	tree->cache_es = NULL;
176654598beSZheng Liu }
177654598beSZheng Liu 
178654598beSZheng Liu #ifdef ES_DEBUG__
ext4_es_print_tree(struct inode * inode)179654598beSZheng Liu static void ext4_es_print_tree(struct inode *inode)
180654598beSZheng Liu {
181654598beSZheng Liu 	struct ext4_es_tree *tree;
182654598beSZheng Liu 	struct rb_node *node;
183654598beSZheng Liu 
184654598beSZheng Liu 	printk(KERN_DEBUG "status extents for inode %lu:", inode->i_ino);
185654598beSZheng Liu 	tree = &EXT4_I(inode)->i_es_tree;
186654598beSZheng Liu 	node = rb_first(&tree->root);
187654598beSZheng Liu 	while (node) {
188654598beSZheng Liu 		struct extent_status *es;
189654598beSZheng Liu 		es = rb_entry(node, struct extent_status, rb_node);
190ce140cddSEric Whitney 		printk(KERN_DEBUG " [%u/%u) %llu %x",
191fdc0212eSZheng Liu 		       es->es_lblk, es->es_len,
192fdc0212eSZheng Liu 		       ext4_es_pblock(es), ext4_es_status(es));
193654598beSZheng Liu 		node = rb_next(node);
194654598beSZheng Liu 	}
195654598beSZheng Liu 	printk(KERN_DEBUG "\n");
196654598beSZheng Liu }
197654598beSZheng Liu #else
198654598beSZheng Liu #define ext4_es_print_tree(inode)
199654598beSZheng Liu #endif
200654598beSZheng Liu 
ext4_es_end(struct extent_status * es)20106b0c886SZheng Liu static inline ext4_lblk_t ext4_es_end(struct extent_status *es)
202654598beSZheng Liu {
20306b0c886SZheng Liu 	BUG_ON(es->es_lblk + es->es_len < es->es_lblk);
20406b0c886SZheng Liu 	return es->es_lblk + es->es_len - 1;
205654598beSZheng Liu }
206654598beSZheng Liu 
207654598beSZheng Liu /*
208654598beSZheng Liu  * search through the tree for an delayed extent with a given offset.  If
209654598beSZheng Liu  * it can't be found, try to find next extent.
210654598beSZheng Liu  */
__es_tree_search(struct rb_root * root,ext4_lblk_t lblk)211654598beSZheng Liu static struct extent_status *__es_tree_search(struct rb_root *root,
21206b0c886SZheng Liu 					      ext4_lblk_t lblk)
213654598beSZheng Liu {
214654598beSZheng Liu 	struct rb_node *node = root->rb_node;
215654598beSZheng Liu 	struct extent_status *es = NULL;
216654598beSZheng Liu 
217654598beSZheng Liu 	while (node) {
218654598beSZheng Liu 		es = rb_entry(node, struct extent_status, rb_node);
21906b0c886SZheng Liu 		if (lblk < es->es_lblk)
220654598beSZheng Liu 			node = node->rb_left;
22106b0c886SZheng Liu 		else if (lblk > ext4_es_end(es))
222654598beSZheng Liu 			node = node->rb_right;
223654598beSZheng Liu 		else
224654598beSZheng Liu 			return es;
225654598beSZheng Liu 	}
226654598beSZheng Liu 
22706b0c886SZheng Liu 	if (es && lblk < es->es_lblk)
228654598beSZheng Liu 		return es;
229654598beSZheng Liu 
23006b0c886SZheng Liu 	if (es && lblk > ext4_es_end(es)) {
231654598beSZheng Liu 		node = rb_next(&es->rb_node);
232654598beSZheng Liu 		return node ? rb_entry(node, struct extent_status, rb_node) :
233654598beSZheng Liu 			      NULL;
234654598beSZheng Liu 	}
235654598beSZheng Liu 
236654598beSZheng Liu 	return NULL;
237654598beSZheng Liu }
238654598beSZheng Liu 
239654598beSZheng Liu /*
240ad431025SEric Whitney  * ext4_es_find_extent_range - find extent with specified status within block
241ad431025SEric Whitney  *                             range or next extent following block range in
242ad431025SEric Whitney  *                             extents status tree
243654598beSZheng Liu  *
244ad431025SEric Whitney  * @inode - file containing the range
245ad431025SEric Whitney  * @matching_fn - pointer to function that matches extents with desired status
246ad431025SEric Whitney  * @lblk - logical block defining start of range
247ad431025SEric Whitney  * @end - logical block defining end of range
248ad431025SEric Whitney  * @es - extent found, if any
249ad431025SEric Whitney  *
250ad431025SEric Whitney  * Find the first extent within the block range specified by @lblk and @end
251ad431025SEric Whitney  * in the extents status tree that satisfies @matching_fn.  If a match
252ad431025SEric Whitney  * is found, it's returned in @es.  If not, and a matching extent is found
253ad431025SEric Whitney  * beyond the block range, it's returned in @es.  If no match is found, an
254ad431025SEric Whitney  * extent is returned in @es whose es_lblk, es_len, and es_pblk components
255ad431025SEric Whitney  * are 0.
256654598beSZheng Liu  */
__es_find_extent_range(struct inode * inode,int (* matching_fn)(struct extent_status * es),ext4_lblk_t lblk,ext4_lblk_t end,struct extent_status * es)257ad431025SEric Whitney static void __es_find_extent_range(struct inode *inode,
258ad431025SEric Whitney 				   int (*matching_fn)(struct extent_status *es),
259e30b5dcaSYan, Zheng 				   ext4_lblk_t lblk, ext4_lblk_t end,
260be401363SZheng Liu 				   struct extent_status *es)
261654598beSZheng Liu {
262654598beSZheng Liu 	struct ext4_es_tree *tree = NULL;
263654598beSZheng Liu 	struct extent_status *es1 = NULL;
264654598beSZheng Liu 	struct rb_node *node;
265654598beSZheng Liu 
266ad431025SEric Whitney 	WARN_ON(es == NULL);
267ad431025SEric Whitney 	WARN_ON(end < lblk);
268992e9fddSZheng Liu 
269654598beSZheng Liu 	tree = &EXT4_I(inode)->i_es_tree;
270654598beSZheng Liu 
271ad431025SEric Whitney 	/* see if the extent has been cached */
272be401363SZheng Liu 	es->es_lblk = es->es_len = es->es_pblk = 0;
273492888dfSJan Kara 	es1 = READ_ONCE(tree->cache_es);
274492888dfSJan Kara 	if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) {
2753be78c73STheodore Ts'o 		es_debug("%u cached by [%u/%u) %llu %x\n",
276be401363SZheng Liu 			 lblk, es1->es_lblk, es1->es_len,
277fdc0212eSZheng Liu 			 ext4_es_pblock(es1), ext4_es_status(es1));
278654598beSZheng Liu 		goto out;
279654598beSZheng Liu 	}
280654598beSZheng Liu 
281be401363SZheng Liu 	es1 = __es_tree_search(&tree->root, lblk);
282654598beSZheng Liu 
283654598beSZheng Liu out:
284ad431025SEric Whitney 	if (es1 && !matching_fn(es1)) {
285be401363SZheng Liu 		while ((node = rb_next(&es1->rb_node)) != NULL) {
286be401363SZheng Liu 			es1 = rb_entry(node, struct extent_status, rb_node);
287e30b5dcaSYan, Zheng 			if (es1->es_lblk > end) {
288e30b5dcaSYan, Zheng 				es1 = NULL;
289e30b5dcaSYan, Zheng 				break;
290e30b5dcaSYan, Zheng 			}
291ad431025SEric Whitney 			if (matching_fn(es1))
292be401363SZheng Liu 				break;
293be401363SZheng Liu 		}
294be401363SZheng Liu 	}
295be401363SZheng Liu 
296ad431025SEric Whitney 	if (es1 && matching_fn(es1)) {
297492888dfSJan Kara 		WRITE_ONCE(tree->cache_es, es1);
29806b0c886SZheng Liu 		es->es_lblk = es1->es_lblk;
29906b0c886SZheng Liu 		es->es_len = es1->es_len;
300fdc0212eSZheng Liu 		es->es_pblk = es1->es_pblk;
301654598beSZheng Liu 	}
302654598beSZheng Liu 
303ad431025SEric Whitney }
304ad431025SEric Whitney 
305ad431025SEric Whitney /*
306ad431025SEric Whitney  * Locking for __es_find_extent_range() for external use
307ad431025SEric Whitney  */
ext4_es_find_extent_range(struct inode * inode,int (* matching_fn)(struct extent_status * es),ext4_lblk_t lblk,ext4_lblk_t end,struct extent_status * es)308ad431025SEric Whitney void ext4_es_find_extent_range(struct inode *inode,
309ad431025SEric Whitney 			       int (*matching_fn)(struct extent_status *es),
310ad431025SEric Whitney 			       ext4_lblk_t lblk, ext4_lblk_t end,
311ad431025SEric Whitney 			       struct extent_status *es)
312ad431025SEric Whitney {
3138016e29fSHarshad Shirwadkar 	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
3148016e29fSHarshad Shirwadkar 		return;
3158016e29fSHarshad Shirwadkar 
316ad431025SEric Whitney 	trace_ext4_es_find_extent_range_enter(inode, lblk);
317ad431025SEric Whitney 
318ad431025SEric Whitney 	read_lock(&EXT4_I(inode)->i_es_lock);
319ad431025SEric Whitney 	__es_find_extent_range(inode, matching_fn, lblk, end, es);
320654598beSZheng Liu 	read_unlock(&EXT4_I(inode)->i_es_lock);
321992e9fddSZheng Liu 
322ad431025SEric Whitney 	trace_ext4_es_find_extent_range_exit(inode, es);
323ad431025SEric Whitney }
324ad431025SEric Whitney 
325ad431025SEric Whitney /*
326ad431025SEric Whitney  * __es_scan_range - search block range for block with specified status
327ad431025SEric Whitney  *                   in extents status tree
328ad431025SEric Whitney  *
329ad431025SEric Whitney  * @inode - file containing the range
330ad431025SEric Whitney  * @matching_fn - pointer to function that matches extents with desired status
331ad431025SEric Whitney  * @lblk - logical block defining start of range
332ad431025SEric Whitney  * @end - logical block defining end of range
333ad431025SEric Whitney  *
334ad431025SEric Whitney  * Returns true if at least one block in the specified block range satisfies
335ad431025SEric Whitney  * the criterion specified by @matching_fn, and false if not.  If at least
336ad431025SEric Whitney  * one extent has the specified status, then there is at least one block
337ad431025SEric Whitney  * in the cluster with that status.  Should only be called by code that has
338ad431025SEric Whitney  * taken i_es_lock.
339ad431025SEric Whitney  */
__es_scan_range(struct inode * inode,int (* matching_fn)(struct extent_status * es),ext4_lblk_t start,ext4_lblk_t end)340ad431025SEric Whitney static bool __es_scan_range(struct inode *inode,
341ad431025SEric Whitney 			    int (*matching_fn)(struct extent_status *es),
342ad431025SEric Whitney 			    ext4_lblk_t start, ext4_lblk_t end)
343ad431025SEric Whitney {
344ad431025SEric Whitney 	struct extent_status es;
345ad431025SEric Whitney 
346ad431025SEric Whitney 	__es_find_extent_range(inode, matching_fn, start, end, &es);
347ad431025SEric Whitney 	if (es.es_len == 0)
348ad431025SEric Whitney 		return false;   /* no matching extent in the tree */
349ad431025SEric Whitney 	else if (es.es_lblk <= start &&
350ad431025SEric Whitney 		 start < es.es_lblk + es.es_len)
351ad431025SEric Whitney 		return true;
352ad431025SEric Whitney 	else if (start <= es.es_lblk && es.es_lblk <= end)
353ad431025SEric Whitney 		return true;
354ad431025SEric Whitney 	else
355ad431025SEric Whitney 		return false;
356ad431025SEric Whitney }
357ad431025SEric Whitney /*
358ad431025SEric Whitney  * Locking for __es_scan_range() for external use
359ad431025SEric Whitney  */
ext4_es_scan_range(struct inode * inode,int (* matching_fn)(struct extent_status * es),ext4_lblk_t lblk,ext4_lblk_t end)360ad431025SEric Whitney bool ext4_es_scan_range(struct inode *inode,
361ad431025SEric Whitney 			int (*matching_fn)(struct extent_status *es),
362ad431025SEric Whitney 			ext4_lblk_t lblk, ext4_lblk_t end)
363ad431025SEric Whitney {
364ad431025SEric Whitney 	bool ret;
365ad431025SEric Whitney 
3668016e29fSHarshad Shirwadkar 	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
3678016e29fSHarshad Shirwadkar 		return false;
3688016e29fSHarshad Shirwadkar 
369ad431025SEric Whitney 	read_lock(&EXT4_I(inode)->i_es_lock);
370ad431025SEric Whitney 	ret = __es_scan_range(inode, matching_fn, lblk, end);
371ad431025SEric Whitney 	read_unlock(&EXT4_I(inode)->i_es_lock);
372ad431025SEric Whitney 
373ad431025SEric Whitney 	return ret;
374ad431025SEric Whitney }
375ad431025SEric Whitney 
376ad431025SEric Whitney /*
377ad431025SEric Whitney  * __es_scan_clu - search cluster for block with specified status in
378ad431025SEric Whitney  *                 extents status tree
379ad431025SEric Whitney  *
380ad431025SEric Whitney  * @inode - file containing the cluster
381ad431025SEric Whitney  * @matching_fn - pointer to function that matches extents with desired status
382ad431025SEric Whitney  * @lblk - logical block in cluster to be searched
383ad431025SEric Whitney  *
384ad431025SEric Whitney  * Returns true if at least one extent in the cluster containing @lblk
385ad431025SEric Whitney  * satisfies the criterion specified by @matching_fn, and false if not.  If at
386ad431025SEric Whitney  * least one extent has the specified status, then there is at least one block
387ad431025SEric Whitney  * in the cluster with that status.  Should only be called by code that has
388ad431025SEric Whitney  * taken i_es_lock.
389ad431025SEric Whitney  */
__es_scan_clu(struct inode * inode,int (* matching_fn)(struct extent_status * es),ext4_lblk_t lblk)390ad431025SEric Whitney static bool __es_scan_clu(struct inode *inode,
391ad431025SEric Whitney 			  int (*matching_fn)(struct extent_status *es),
392ad431025SEric Whitney 			  ext4_lblk_t lblk)
393ad431025SEric Whitney {
394ad431025SEric Whitney 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
395ad431025SEric Whitney 	ext4_lblk_t lblk_start, lblk_end;
396ad431025SEric Whitney 
397ad431025SEric Whitney 	lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
398ad431025SEric Whitney 	lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
399ad431025SEric Whitney 
400ad431025SEric Whitney 	return __es_scan_range(inode, matching_fn, lblk_start, lblk_end);
401ad431025SEric Whitney }
402ad431025SEric Whitney 
403ad431025SEric Whitney /*
404ad431025SEric Whitney  * Locking for __es_scan_clu() for external use
405ad431025SEric Whitney  */
ext4_es_scan_clu(struct inode * inode,int (* matching_fn)(struct extent_status * es),ext4_lblk_t lblk)406ad431025SEric Whitney bool ext4_es_scan_clu(struct inode *inode,
407ad431025SEric Whitney 		      int (*matching_fn)(struct extent_status *es),
408ad431025SEric Whitney 		      ext4_lblk_t lblk)
409ad431025SEric Whitney {
410ad431025SEric Whitney 	bool ret;
411ad431025SEric Whitney 
4128016e29fSHarshad Shirwadkar 	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
4138016e29fSHarshad Shirwadkar 		return false;
4148016e29fSHarshad Shirwadkar 
415ad431025SEric Whitney 	read_lock(&EXT4_I(inode)->i_es_lock);
416ad431025SEric Whitney 	ret = __es_scan_clu(inode, matching_fn, lblk);
417ad431025SEric Whitney 	read_unlock(&EXT4_I(inode)->i_es_lock);
418ad431025SEric Whitney 
419ad431025SEric Whitney 	return ret;
420654598beSZheng Liu }
421654598beSZheng Liu 
ext4_es_list_add(struct inode * inode)422b0dea4c1SJan Kara static void ext4_es_list_add(struct inode *inode)
423edaa53caSZheng Liu {
424edaa53caSZheng Liu 	struct ext4_inode_info *ei = EXT4_I(inode);
425edaa53caSZheng Liu 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
426edaa53caSZheng Liu 
427edaa53caSZheng Liu 	if (!list_empty(&ei->i_es_list))
428edaa53caSZheng Liu 		return;
429edaa53caSZheng Liu 
430edaa53caSZheng Liu 	spin_lock(&sbi->s_es_lock);
431edaa53caSZheng Liu 	if (list_empty(&ei->i_es_list)) {
432edaa53caSZheng Liu 		list_add_tail(&ei->i_es_list, &sbi->s_es_list);
433edaa53caSZheng Liu 		sbi->s_es_nr_inode++;
434edaa53caSZheng Liu 	}
435edaa53caSZheng Liu 	spin_unlock(&sbi->s_es_lock);
436edaa53caSZheng Liu }
437edaa53caSZheng Liu 
ext4_es_list_del(struct inode * inode)438b0dea4c1SJan Kara static void ext4_es_list_del(struct inode *inode)
439edaa53caSZheng Liu {
440edaa53caSZheng Liu 	struct ext4_inode_info *ei = EXT4_I(inode);
441edaa53caSZheng Liu 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
442edaa53caSZheng Liu 
443edaa53caSZheng Liu 	spin_lock(&sbi->s_es_lock);
444edaa53caSZheng Liu 	if (!list_empty(&ei->i_es_list)) {
445edaa53caSZheng Liu 		list_del_init(&ei->i_es_list);
446edaa53caSZheng Liu 		sbi->s_es_nr_inode--;
447edaa53caSZheng Liu 		WARN_ON_ONCE(sbi->s_es_nr_inode < 0);
448edaa53caSZheng Liu 	}
449edaa53caSZheng Liu 	spin_unlock(&sbi->s_es_lock);
450edaa53caSZheng Liu }
451edaa53caSZheng Liu 
__alloc_pending(bool nofail)452*8e387c89SZhang Yi static inline struct pending_reservation *__alloc_pending(bool nofail)
453*8e387c89SZhang Yi {
454*8e387c89SZhang Yi 	if (!nofail)
455*8e387c89SZhang Yi 		return kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
456*8e387c89SZhang Yi 
457*8e387c89SZhang Yi 	return kmem_cache_zalloc(ext4_pending_cachep, GFP_KERNEL | __GFP_NOFAIL);
458*8e387c89SZhang Yi }
459*8e387c89SZhang Yi 
__free_pending(struct pending_reservation * pr)460*8e387c89SZhang Yi static inline void __free_pending(struct pending_reservation *pr)
461*8e387c89SZhang Yi {
462*8e387c89SZhang Yi 	kmem_cache_free(ext4_pending_cachep, pr);
463*8e387c89SZhang Yi }
464*8e387c89SZhang Yi 
4659649eb18SBaokun Li /*
4669649eb18SBaokun Li  * Returns true if we cannot fail to allocate memory for this extent_status
4679649eb18SBaokun Li  * entry and cannot reclaim it until its status changes.
4689649eb18SBaokun Li  */
ext4_es_must_keep(struct extent_status * es)4699649eb18SBaokun Li static inline bool ext4_es_must_keep(struct extent_status *es)
4709649eb18SBaokun Li {
4719649eb18SBaokun Li 	/* fiemap, bigalloc, and seek_data/hole need to use it. */
4729649eb18SBaokun Li 	if (ext4_es_is_delayed(es))
4739649eb18SBaokun Li 		return true;
4749649eb18SBaokun Li 
4759649eb18SBaokun Li 	return false;
4769649eb18SBaokun Li }
4779649eb18SBaokun Li 
__es_alloc_extent(bool nofail)47873a2f033SBaokun Li static inline struct extent_status *__es_alloc_extent(bool nofail)
479654598beSZheng Liu {
48073a2f033SBaokun Li 	if (!nofail)
48173a2f033SBaokun Li 		return kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
48273a2f033SBaokun Li 
48373a2f033SBaokun Li 	return kmem_cache_zalloc(ext4_es_cachep, GFP_KERNEL | __GFP_NOFAIL);
48473a2f033SBaokun Li }
48573a2f033SBaokun Li 
ext4_es_init_extent(struct inode * inode,struct extent_status * es,ext4_lblk_t lblk,ext4_lblk_t len,ext4_fsblk_t pblk)48673a2f033SBaokun Li static void ext4_es_init_extent(struct inode *inode, struct extent_status *es,
48773a2f033SBaokun Li 		ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk)
48873a2f033SBaokun Li {
48906b0c886SZheng Liu 	es->es_lblk = lblk;
49006b0c886SZheng Liu 	es->es_len = len;
491fdc0212eSZheng Liu 	es->es_pblk = pblk;
49274cd15cdSZheng Liu 
4939649eb18SBaokun Li 	/* We never try to reclaim a must kept extent, so we don't count it. */
4949649eb18SBaokun Li 	if (!ext4_es_must_keep(es)) {
495b0dea4c1SJan Kara 		if (!EXT4_I(inode)->i_es_shk_nr++)
496b0dea4c1SJan Kara 			ext4_es_list_add(inode);
497eb68d0e2SZheng Liu 		percpu_counter_inc(&EXT4_SB(inode->i_sb)->
498edaa53caSZheng Liu 					s_es_stats.es_stats_shk_cnt);
49924630774STheodore Ts'o 	}
50074cd15cdSZheng Liu 
501eb68d0e2SZheng Liu 	EXT4_I(inode)->i_es_all_nr++;
502eb68d0e2SZheng Liu 	percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
50373a2f033SBaokun Li }
504eb68d0e2SZheng Liu 
__es_free_extent(struct extent_status * es)50573a2f033SBaokun Li static inline void __es_free_extent(struct extent_status *es)
50673a2f033SBaokun Li {
50773a2f033SBaokun Li 	kmem_cache_free(ext4_es_cachep, es);
508654598beSZheng Liu }
509654598beSZheng Liu 
ext4_es_free_extent(struct inode * inode,struct extent_status * es)510bdedbb7bSZheng Liu static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
511654598beSZheng Liu {
512eb68d0e2SZheng Liu 	EXT4_I(inode)->i_es_all_nr--;
513eb68d0e2SZheng Liu 	percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
514eb68d0e2SZheng Liu 
5159649eb18SBaokun Li 	/* Decrease the shrink counter when we can reclaim the extent. */
5169649eb18SBaokun Li 	if (!ext4_es_must_keep(es)) {
517edaa53caSZheng Liu 		BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
518b0dea4c1SJan Kara 		if (!--EXT4_I(inode)->i_es_shk_nr)
519b0dea4c1SJan Kara 			ext4_es_list_del(inode);
520eb68d0e2SZheng Liu 		percpu_counter_dec(&EXT4_SB(inode->i_sb)->
521edaa53caSZheng Liu 					s_es_stats.es_stats_shk_cnt);
52274cd15cdSZheng Liu 	}
52374cd15cdSZheng Liu 
52473a2f033SBaokun Li 	__es_free_extent(es);
525654598beSZheng Liu }
526654598beSZheng Liu 
52706b0c886SZheng Liu /*
52806b0c886SZheng Liu  * Check whether or not two extents can be merged
52906b0c886SZheng Liu  * Condition:
53006b0c886SZheng Liu  *  - logical block number is contiguous
531fdc0212eSZheng Liu  *  - physical block number is contiguous
532fdc0212eSZheng Liu  *  - status is equal
53306b0c886SZheng Liu  */
ext4_es_can_be_merged(struct extent_status * es1,struct extent_status * es2)53406b0c886SZheng Liu static int ext4_es_can_be_merged(struct extent_status *es1,
53506b0c886SZheng Liu 				 struct extent_status *es2)
53606b0c886SZheng Liu {
5372be12de9SJan Kara 	if (ext4_es_type(es1) != ext4_es_type(es2))
538fdc0212eSZheng Liu 		return 0;
539fdc0212eSZheng Liu 
5400baaea64SLukas Czerner 	if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) {
5410baaea64SLukas Czerner 		pr_warn("ES assertion failed when merging extents. "
5420baaea64SLukas Czerner 			"The sum of lengths of es1 (%d) and es2 (%d) "
5430baaea64SLukas Czerner 			"is bigger than allowed file size (%d)\n",
5440baaea64SLukas Czerner 			es1->es_len, es2->es_len, EXT_MAX_BLOCKS);
5450baaea64SLukas Czerner 		WARN_ON(1);
546fdc0212eSZheng Liu 		return 0;
5470baaea64SLukas Czerner 	}
548fdc0212eSZheng Liu 
549bd384364SZheng Liu 	if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
550bd384364SZheng Liu 		return 0;
551bd384364SZheng Liu 
552bd384364SZheng Liu 	if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
553bd384364SZheng Liu 	    (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2)))
55406b0c886SZheng Liu 		return 1;
555bd384364SZheng Liu 
556bd384364SZheng Liu 	if (ext4_es_is_hole(es1))
557bd384364SZheng Liu 		return 1;
558bd384364SZheng Liu 
559bd384364SZheng Liu 	/* we need to check delayed extent is without unwritten status */
560bd384364SZheng Liu 	if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
561bd384364SZheng Liu 		return 1;
562bd384364SZheng Liu 
563bd384364SZheng Liu 	return 0;
56406b0c886SZheng Liu }
56506b0c886SZheng Liu 
566654598beSZheng Liu static struct extent_status *
ext4_es_try_to_merge_left(struct inode * inode,struct extent_status * es)567bdedbb7bSZheng Liu ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
568654598beSZheng Liu {
569bdedbb7bSZheng Liu 	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
570654598beSZheng Liu 	struct extent_status *es1;
571654598beSZheng Liu 	struct rb_node *node;
572654598beSZheng Liu 
573654598beSZheng Liu 	node = rb_prev(&es->rb_node);
574654598beSZheng Liu 	if (!node)
575654598beSZheng Liu 		return es;
576654598beSZheng Liu 
577654598beSZheng Liu 	es1 = rb_entry(node, struct extent_status, rb_node);
57806b0c886SZheng Liu 	if (ext4_es_can_be_merged(es1, es)) {
57906b0c886SZheng Liu 		es1->es_len += es->es_len;
5802be12de9SJan Kara 		if (ext4_es_is_referenced(es))
5812be12de9SJan Kara 			ext4_es_set_referenced(es1);
582654598beSZheng Liu 		rb_erase(&es->rb_node, &tree->root);
583bdedbb7bSZheng Liu 		ext4_es_free_extent(inode, es);
584654598beSZheng Liu 		es = es1;
585654598beSZheng Liu 	}
586654598beSZheng Liu 
587654598beSZheng Liu 	return es;
588654598beSZheng Liu }
589654598beSZheng Liu 
590654598beSZheng Liu static struct extent_status *
ext4_es_try_to_merge_right(struct inode * inode,struct extent_status * es)591bdedbb7bSZheng Liu ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
592654598beSZheng Liu {
593bdedbb7bSZheng Liu 	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
594654598beSZheng Liu 	struct extent_status *es1;
595654598beSZheng Liu 	struct rb_node *node;
596654598beSZheng Liu 
597654598beSZheng Liu 	node = rb_next(&es->rb_node);
598654598beSZheng Liu 	if (!node)
599654598beSZheng Liu 		return es;
600654598beSZheng Liu 
601654598beSZheng Liu 	es1 = rb_entry(node, struct extent_status, rb_node);
60206b0c886SZheng Liu 	if (ext4_es_can_be_merged(es, es1)) {
60306b0c886SZheng Liu 		es->es_len += es1->es_len;
6042be12de9SJan Kara 		if (ext4_es_is_referenced(es1))
6052be12de9SJan Kara 			ext4_es_set_referenced(es);
606654598beSZheng Liu 		rb_erase(node, &tree->root);
607bdedbb7bSZheng Liu 		ext4_es_free_extent(inode, es1);
608654598beSZheng Liu 	}
609654598beSZheng Liu 
610654598beSZheng Liu 	return es;
611654598beSZheng Liu }
612654598beSZheng Liu 
613921f266bSDmitry Monakhov #ifdef ES_AGGRESSIVE_TEST
614d7b2a00cSZheng Liu #include "ext4_extents.h"	/* Needed when ES_AGGRESSIVE_TEST is defined */
615d7b2a00cSZheng Liu 
ext4_es_insert_extent_ext_check(struct inode * inode,struct extent_status * es)616921f266bSDmitry Monakhov static void ext4_es_insert_extent_ext_check(struct inode *inode,
617921f266bSDmitry Monakhov 					    struct extent_status *es)
618921f266bSDmitry Monakhov {
619921f266bSDmitry Monakhov 	struct ext4_ext_path *path = NULL;
620921f266bSDmitry Monakhov 	struct ext4_extent *ex;
621921f266bSDmitry Monakhov 	ext4_lblk_t ee_block;
622921f266bSDmitry Monakhov 	ext4_fsblk_t ee_start;
623921f266bSDmitry Monakhov 	unsigned short ee_len;
624921f266bSDmitry Monakhov 	int depth, ee_status, es_status;
625921f266bSDmitry Monakhov 
626ed8a1a76STheodore Ts'o 	path = ext4_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
627921f266bSDmitry Monakhov 	if (IS_ERR(path))
628921f266bSDmitry Monakhov 		return;
629921f266bSDmitry Monakhov 
630921f266bSDmitry Monakhov 	depth = ext_depth(inode);
631921f266bSDmitry Monakhov 	ex = path[depth].p_ext;
632921f266bSDmitry Monakhov 
633921f266bSDmitry Monakhov 	if (ex) {
634921f266bSDmitry Monakhov 
635921f266bSDmitry Monakhov 		ee_block = le32_to_cpu(ex->ee_block);
636921f266bSDmitry Monakhov 		ee_start = ext4_ext_pblock(ex);
637921f266bSDmitry Monakhov 		ee_len = ext4_ext_get_actual_len(ex);
638921f266bSDmitry Monakhov 
639556615dcSLukas Czerner 		ee_status = ext4_ext_is_unwritten(ex) ? 1 : 0;
640921f266bSDmitry Monakhov 		es_status = ext4_es_is_unwritten(es) ? 1 : 0;
641921f266bSDmitry Monakhov 
642921f266bSDmitry Monakhov 		/*
643921f266bSDmitry Monakhov 		 * Make sure ex and es are not overlap when we try to insert
644921f266bSDmitry Monakhov 		 * a delayed/hole extent.
645921f266bSDmitry Monakhov 		 */
646921f266bSDmitry Monakhov 		if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
647921f266bSDmitry Monakhov 			if (in_range(es->es_lblk, ee_block, ee_len)) {
648bdafe42aSTheodore Ts'o 				pr_warn("ES insert assertion failed for "
649921f266bSDmitry Monakhov 					"inode: %lu we can find an extent "
650921f266bSDmitry Monakhov 					"at block [%d/%d/%llu/%c], but we "
651ce140cddSEric Whitney 					"want to add a delayed/hole extent "
652ce140cddSEric Whitney 					"[%d/%d/%llu/%x]\n",
653921f266bSDmitry Monakhov 					inode->i_ino, ee_block, ee_len,
654921f266bSDmitry Monakhov 					ee_start, ee_status ? 'u' : 'w',
655921f266bSDmitry Monakhov 					es->es_lblk, es->es_len,
656921f266bSDmitry Monakhov 					ext4_es_pblock(es), ext4_es_status(es));
657921f266bSDmitry Monakhov 			}
658921f266bSDmitry Monakhov 			goto out;
659921f266bSDmitry Monakhov 		}
660921f266bSDmitry Monakhov 
661921f266bSDmitry Monakhov 		/*
662921f266bSDmitry Monakhov 		 * We don't check ee_block == es->es_lblk, etc. because es
663921f266bSDmitry Monakhov 		 * might be a part of whole extent, vice versa.
664921f266bSDmitry Monakhov 		 */
665921f266bSDmitry Monakhov 		if (es->es_lblk < ee_block ||
666921f266bSDmitry Monakhov 		    ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
667bdafe42aSTheodore Ts'o 			pr_warn("ES insert assertion failed for inode: %lu "
668921f266bSDmitry Monakhov 				"ex_status [%d/%d/%llu/%c] != "
669921f266bSDmitry Monakhov 				"es_status [%d/%d/%llu/%c]\n", inode->i_ino,
670921f266bSDmitry Monakhov 				ee_block, ee_len, ee_start,
671921f266bSDmitry Monakhov 				ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
672921f266bSDmitry Monakhov 				ext4_es_pblock(es), es_status ? 'u' : 'w');
673921f266bSDmitry Monakhov 			goto out;
674921f266bSDmitry Monakhov 		}
675921f266bSDmitry Monakhov 
676921f266bSDmitry Monakhov 		if (ee_status ^ es_status) {
677bdafe42aSTheodore Ts'o 			pr_warn("ES insert assertion failed for inode: %lu "
678921f266bSDmitry Monakhov 				"ex_status [%d/%d/%llu/%c] != "
679921f266bSDmitry Monakhov 				"es_status [%d/%d/%llu/%c]\n", inode->i_ino,
680921f266bSDmitry Monakhov 				ee_block, ee_len, ee_start,
681921f266bSDmitry Monakhov 				ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
682921f266bSDmitry Monakhov 				ext4_es_pblock(es), es_status ? 'u' : 'w');
683921f266bSDmitry Monakhov 		}
684921f266bSDmitry Monakhov 	} else {
685921f266bSDmitry Monakhov 		/*
686921f266bSDmitry Monakhov 		 * We can't find an extent on disk.  So we need to make sure
687921f266bSDmitry Monakhov 		 * that we don't want to add an written/unwritten extent.
688921f266bSDmitry Monakhov 		 */
689921f266bSDmitry Monakhov 		if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
690bdafe42aSTheodore Ts'o 			pr_warn("ES insert assertion failed for inode: %lu "
691921f266bSDmitry Monakhov 				"can't find an extent at block %d but we want "
692ce140cddSEric Whitney 				"to add a written/unwritten extent "
693ce140cddSEric Whitney 				"[%d/%d/%llu/%x]\n", inode->i_ino,
694921f266bSDmitry Monakhov 				es->es_lblk, es->es_lblk, es->es_len,
695921f266bSDmitry Monakhov 				ext4_es_pblock(es), ext4_es_status(es));
696921f266bSDmitry Monakhov 		}
697921f266bSDmitry Monakhov 	}
698921f266bSDmitry Monakhov out:
6997ff5fddaSYe Bin 	ext4_free_ext_path(path);
700921f266bSDmitry Monakhov }
701921f266bSDmitry Monakhov 
ext4_es_insert_extent_ind_check(struct inode * inode,struct extent_status * es)702921f266bSDmitry Monakhov static void ext4_es_insert_extent_ind_check(struct inode *inode,
703921f266bSDmitry Monakhov 					    struct extent_status *es)
704921f266bSDmitry Monakhov {
705921f266bSDmitry Monakhov 	struct ext4_map_blocks map;
706921f266bSDmitry Monakhov 	int retval;
707921f266bSDmitry Monakhov 
708921f266bSDmitry Monakhov 	/*
709921f266bSDmitry Monakhov 	 * Here we call ext4_ind_map_blocks to lookup a block mapping because
710921f266bSDmitry Monakhov 	 * 'Indirect' structure is defined in indirect.c.  So we couldn't
711921f266bSDmitry Monakhov 	 * access direct/indirect tree from outside.  It is too dirty to define
712921f266bSDmitry Monakhov 	 * this function in indirect.c file.
713921f266bSDmitry Monakhov 	 */
714921f266bSDmitry Monakhov 
715921f266bSDmitry Monakhov 	map.m_lblk = es->es_lblk;
716921f266bSDmitry Monakhov 	map.m_len = es->es_len;
717921f266bSDmitry Monakhov 
718921f266bSDmitry Monakhov 	retval = ext4_ind_map_blocks(NULL, inode, &map, 0);
719921f266bSDmitry Monakhov 	if (retval > 0) {
720921f266bSDmitry Monakhov 		if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) {
721921f266bSDmitry Monakhov 			/*
722921f266bSDmitry Monakhov 			 * We want to add a delayed/hole extent but this
723921f266bSDmitry Monakhov 			 * block has been allocated.
724921f266bSDmitry Monakhov 			 */
725bdafe42aSTheodore Ts'o 			pr_warn("ES insert assertion failed for inode: %lu "
726921f266bSDmitry Monakhov 				"We can find blocks but we want to add a "
727ce140cddSEric Whitney 				"delayed/hole extent [%d/%d/%llu/%x]\n",
728921f266bSDmitry Monakhov 				inode->i_ino, es->es_lblk, es->es_len,
729921f266bSDmitry Monakhov 				ext4_es_pblock(es), ext4_es_status(es));
730921f266bSDmitry Monakhov 			return;
731921f266bSDmitry Monakhov 		} else if (ext4_es_is_written(es)) {
732921f266bSDmitry Monakhov 			if (retval != es->es_len) {
733bdafe42aSTheodore Ts'o 				pr_warn("ES insert assertion failed for "
734921f266bSDmitry Monakhov 					"inode: %lu retval %d != es_len %d\n",
735921f266bSDmitry Monakhov 					inode->i_ino, retval, es->es_len);
736921f266bSDmitry Monakhov 				return;
737921f266bSDmitry Monakhov 			}
738921f266bSDmitry Monakhov 			if (map.m_pblk != ext4_es_pblock(es)) {
739bdafe42aSTheodore Ts'o 				pr_warn("ES insert assertion failed for "
740921f266bSDmitry Monakhov 					"inode: %lu m_pblk %llu != "
741921f266bSDmitry Monakhov 					"es_pblk %llu\n",
742921f266bSDmitry Monakhov 					inode->i_ino, map.m_pblk,
743921f266bSDmitry Monakhov 					ext4_es_pblock(es));
744921f266bSDmitry Monakhov 				return;
745921f266bSDmitry Monakhov 			}
746921f266bSDmitry Monakhov 		} else {
747921f266bSDmitry Monakhov 			/*
748921f266bSDmitry Monakhov 			 * We don't need to check unwritten extent because
749921f266bSDmitry Monakhov 			 * indirect-based file doesn't have it.
750921f266bSDmitry Monakhov 			 */
7511e83bc81SArnd Bergmann 			BUG();
752921f266bSDmitry Monakhov 		}
753921f266bSDmitry Monakhov 	} else if (retval == 0) {
754921f266bSDmitry Monakhov 		if (ext4_es_is_written(es)) {
755bdafe42aSTheodore Ts'o 			pr_warn("ES insert assertion failed for inode: %lu "
756921f266bSDmitry Monakhov 				"We can't find the block but we want to add "
757ce140cddSEric Whitney 				"a written extent [%d/%d/%llu/%x]\n",
758921f266bSDmitry Monakhov 				inode->i_ino, es->es_lblk, es->es_len,
759921f266bSDmitry Monakhov 				ext4_es_pblock(es), ext4_es_status(es));
760921f266bSDmitry Monakhov 			return;
761921f266bSDmitry Monakhov 		}
762921f266bSDmitry Monakhov 	}
763921f266bSDmitry Monakhov }
764921f266bSDmitry Monakhov 
ext4_es_insert_extent_check(struct inode * inode,struct extent_status * es)765921f266bSDmitry Monakhov static inline void ext4_es_insert_extent_check(struct inode *inode,
766921f266bSDmitry Monakhov 					       struct extent_status *es)
767921f266bSDmitry Monakhov {
768921f266bSDmitry Monakhov 	/*
769921f266bSDmitry Monakhov 	 * We don't need to worry about the race condition because
770921f266bSDmitry Monakhov 	 * caller takes i_data_sem locking.
771921f266bSDmitry Monakhov 	 */
772921f266bSDmitry Monakhov 	BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
773921f266bSDmitry Monakhov 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
774921f266bSDmitry Monakhov 		ext4_es_insert_extent_ext_check(inode, es);
775921f266bSDmitry Monakhov 	else
776921f266bSDmitry Monakhov 		ext4_es_insert_extent_ind_check(inode, es);
777921f266bSDmitry Monakhov }
778921f266bSDmitry Monakhov #else
ext4_es_insert_extent_check(struct inode * inode,struct extent_status * es)779921f266bSDmitry Monakhov static inline void ext4_es_insert_extent_check(struct inode *inode,
780921f266bSDmitry Monakhov 					       struct extent_status *es)
781921f266bSDmitry Monakhov {
782921f266bSDmitry Monakhov }
783921f266bSDmitry Monakhov #endif
784921f266bSDmitry Monakhov 
__es_insert_extent(struct inode * inode,struct extent_status * newes,struct extent_status * prealloc)78595f0b320SBaokun Li static int __es_insert_extent(struct inode *inode, struct extent_status *newes,
78695f0b320SBaokun Li 			      struct extent_status *prealloc)
787654598beSZheng Liu {
788bdedbb7bSZheng Liu 	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
789654598beSZheng Liu 	struct rb_node **p = &tree->root.rb_node;
790654598beSZheng Liu 	struct rb_node *parent = NULL;
791654598beSZheng Liu 	struct extent_status *es;
792654598beSZheng Liu 
793654598beSZheng Liu 	while (*p) {
794654598beSZheng Liu 		parent = *p;
795654598beSZheng Liu 		es = rb_entry(parent, struct extent_status, rb_node);
796654598beSZheng Liu 
79706b0c886SZheng Liu 		if (newes->es_lblk < es->es_lblk) {
79806b0c886SZheng Liu 			if (ext4_es_can_be_merged(newes, es)) {
79906b0c886SZheng Liu 				/*
80006b0c886SZheng Liu 				 * Here we can modify es_lblk directly
80106b0c886SZheng Liu 				 * because it isn't overlapped.
80206b0c886SZheng Liu 				 */
80306b0c886SZheng Liu 				es->es_lblk = newes->es_lblk;
80406b0c886SZheng Liu 				es->es_len += newes->es_len;
805fdc0212eSZheng Liu 				if (ext4_es_is_written(es) ||
806fdc0212eSZheng Liu 				    ext4_es_is_unwritten(es))
807fdc0212eSZheng Liu 					ext4_es_store_pblock(es,
808fdc0212eSZheng Liu 							     newes->es_pblk);
809bdedbb7bSZheng Liu 				es = ext4_es_try_to_merge_left(inode, es);
810654598beSZheng Liu 				goto out;
811654598beSZheng Liu 			}
812654598beSZheng Liu 			p = &(*p)->rb_left;
81306b0c886SZheng Liu 		} else if (newes->es_lblk > ext4_es_end(es)) {
81406b0c886SZheng Liu 			if (ext4_es_can_be_merged(es, newes)) {
81506b0c886SZheng Liu 				es->es_len += newes->es_len;
816bdedbb7bSZheng Liu 				es = ext4_es_try_to_merge_right(inode, es);
817654598beSZheng Liu 				goto out;
818654598beSZheng Liu 			}
819654598beSZheng Liu 			p = &(*p)->rb_right;
820654598beSZheng Liu 		} else {
8211e83bc81SArnd Bergmann 			BUG();
82206b0c886SZheng Liu 			return -EINVAL;
823654598beSZheng Liu 		}
824654598beSZheng Liu 	}
825654598beSZheng Liu 
82695f0b320SBaokun Li 	if (prealloc)
82795f0b320SBaokun Li 		es = prealloc;
82895f0b320SBaokun Li 	else
82973a2f033SBaokun Li 		es = __es_alloc_extent(false);
830654598beSZheng Liu 	if (!es)
831654598beSZheng Liu 		return -ENOMEM;
83273a2f033SBaokun Li 	ext4_es_init_extent(inode, es, newes->es_lblk, newes->es_len,
83373a2f033SBaokun Li 			    newes->es_pblk);
83473a2f033SBaokun Li 
835654598beSZheng Liu 	rb_link_node(&es->rb_node, parent, p);
836654598beSZheng Liu 	rb_insert_color(&es->rb_node, &tree->root);
837654598beSZheng Liu 
838654598beSZheng Liu out:
839654598beSZheng Liu 	tree->cache_es = es;
840654598beSZheng Liu 	return 0;
841654598beSZheng Liu }
842654598beSZheng Liu 
843654598beSZheng Liu /*
844bdafe42aSTheodore Ts'o  * ext4_es_insert_extent() adds information to an inode's extent
845bdafe42aSTheodore Ts'o  * status tree.
846654598beSZheng Liu  */
ext4_es_insert_extent(struct inode * inode,ext4_lblk_t lblk,ext4_lblk_t len,ext4_fsblk_t pblk,unsigned int status)8476c120399SBaokun Li void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
848fdc0212eSZheng Liu 			   ext4_lblk_t len, ext4_fsblk_t pblk,
8493be78c73STheodore Ts'o 			   unsigned int status)
850654598beSZheng Liu {
85106b0c886SZheng Liu 	struct extent_status newes;
85206b0c886SZheng Liu 	ext4_lblk_t end = lblk + len - 1;
853*8e387c89SZhang Yi 	int err1 = 0, err2 = 0, err3 = 0;
854b6bf9171SEric Whitney 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
8552a69c450SBaokun Li 	struct extent_status *es1 = NULL;
8562a69c450SBaokun Li 	struct extent_status *es2 = NULL;
857*8e387c89SZhang Yi 	struct pending_reservation *pr = NULL;
858*8e387c89SZhang Yi 	bool revise_pending = false;
859654598beSZheng Liu 
8608016e29fSHarshad Shirwadkar 	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
8616c120399SBaokun Li 		return;
8628016e29fSHarshad Shirwadkar 
8633be78c73STheodore Ts'o 	es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
864fdc0212eSZheng Liu 		 lblk, len, pblk, status, inode->i_ino);
86506b0c886SZheng Liu 
866d4381472SEryu Guan 	if (!len)
8676c120399SBaokun Li 		return;
868d4381472SEryu Guan 
86906b0c886SZheng Liu 	BUG_ON(end < lblk);
87006b0c886SZheng Liu 
871d2dc317dSLukas Czerner 	if ((status & EXTENT_STATUS_DELAYED) &&
872d2dc317dSLukas Czerner 	    (status & EXTENT_STATUS_WRITTEN)) {
873d2dc317dSLukas Czerner 		ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as "
874d2dc317dSLukas Czerner 				" delayed and written which can potentially "
8758d2ae1cbSJakub Wilk 				" cause data loss.", lblk, len);
876d2dc317dSLukas Czerner 		WARN_ON(1);
877d2dc317dSLukas Czerner 	}
878d2dc317dSLukas Czerner 
87906b0c886SZheng Liu 	newes.es_lblk = lblk;
88006b0c886SZheng Liu 	newes.es_len = len;
8819a6633b1STheodore Ts'o 	ext4_es_store_pblock_status(&newes, pblk, status);
882fdc0212eSZheng Liu 	trace_ext4_es_insert_extent(inode, &newes);
883654598beSZheng Liu 
884921f266bSDmitry Monakhov 	ext4_es_insert_extent_check(inode, &newes);
885921f266bSDmitry Monakhov 
886*8e387c89SZhang Yi 	revise_pending = sbi->s_cluster_ratio > 1 &&
887*8e387c89SZhang Yi 			 test_opt(inode->i_sb, DELALLOC) &&
888*8e387c89SZhang Yi 			 (status & (EXTENT_STATUS_WRITTEN |
889*8e387c89SZhang Yi 				    EXTENT_STATUS_UNWRITTEN));
890e15f742cSTheodore Ts'o retry:
8912a69c450SBaokun Li 	if (err1 && !es1)
8922a69c450SBaokun Li 		es1 = __es_alloc_extent(true);
8932a69c450SBaokun Li 	if ((err1 || err2) && !es2)
8942a69c450SBaokun Li 		es2 = __es_alloc_extent(true);
895*8e387c89SZhang Yi 	if ((err1 || err2 || err3) && revise_pending && !pr)
896*8e387c89SZhang Yi 		pr = __alloc_pending(true);
8972a69c450SBaokun Li 	write_lock(&EXT4_I(inode)->i_es_lock);
8982a69c450SBaokun Li 
8992a69c450SBaokun Li 	err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
9002a69c450SBaokun Li 	if (err1 != 0)
9012a69c450SBaokun Li 		goto error;
902768d612fSBaokun Li 	/* Free preallocated extent if it didn't get used. */
903768d612fSBaokun Li 	if (es1) {
904768d612fSBaokun Li 		if (!es1->es_len)
905768d612fSBaokun Li 			__es_free_extent(es1);
906768d612fSBaokun Li 		es1 = NULL;
907768d612fSBaokun Li 	}
9082a69c450SBaokun Li 
9092a69c450SBaokun Li 	err2 = __es_insert_extent(inode, &newes, es2);
9102a69c450SBaokun Li 	if (err2 == -ENOMEM && !ext4_es_must_keep(&newes))
9112a69c450SBaokun Li 		err2 = 0;
9122a69c450SBaokun Li 	if (err2 != 0)
9132a69c450SBaokun Li 		goto error;
914768d612fSBaokun Li 	/* Free preallocated extent if it didn't get used. */
915768d612fSBaokun Li 	if (es2) {
916768d612fSBaokun Li 		if (!es2->es_len)
917768d612fSBaokun Li 			__es_free_extent(es2);
918768d612fSBaokun Li 		es2 = NULL;
919768d612fSBaokun Li 	}
92006b0c886SZheng Liu 
921*8e387c89SZhang Yi 	if (revise_pending) {
922*8e387c89SZhang Yi 		err3 = __revise_pending(inode, lblk, len, &pr);
923*8e387c89SZhang Yi 		if (err3 != 0)
924*8e387c89SZhang Yi 			goto error;
925*8e387c89SZhang Yi 		if (pr) {
926*8e387c89SZhang Yi 			__free_pending(pr);
927*8e387c89SZhang Yi 			pr = NULL;
928*8e387c89SZhang Yi 		}
929*8e387c89SZhang Yi 	}
93006b0c886SZheng Liu error:
931654598beSZheng Liu 	write_unlock(&EXT4_I(inode)->i_es_lock);
932*8e387c89SZhang Yi 	if (err1 || err2 || err3)
9332a69c450SBaokun Li 		goto retry;
934654598beSZheng Liu 
935654598beSZheng Liu 	ext4_es_print_tree(inode);
9366c120399SBaokun Li 	return;
937654598beSZheng Liu }
938654598beSZheng Liu 
939d100eef2SZheng Liu /*
940107a7bd3STheodore Ts'o  * ext4_es_cache_extent() inserts information into the extent status
941107a7bd3STheodore Ts'o  * tree if and only if there isn't information about the range in
942107a7bd3STheodore Ts'o  * question already.
943107a7bd3STheodore Ts'o  */
ext4_es_cache_extent(struct inode * inode,ext4_lblk_t lblk,ext4_lblk_t len,ext4_fsblk_t pblk,unsigned int status)944107a7bd3STheodore Ts'o void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
945107a7bd3STheodore Ts'o 			  ext4_lblk_t len, ext4_fsblk_t pblk,
946107a7bd3STheodore Ts'o 			  unsigned int status)
947107a7bd3STheodore Ts'o {
948107a7bd3STheodore Ts'o 	struct extent_status *es;
949107a7bd3STheodore Ts'o 	struct extent_status newes;
950107a7bd3STheodore Ts'o 	ext4_lblk_t end = lblk + len - 1;
951107a7bd3STheodore Ts'o 
9528016e29fSHarshad Shirwadkar 	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
9538016e29fSHarshad Shirwadkar 		return;
9548016e29fSHarshad Shirwadkar 
955107a7bd3STheodore Ts'o 	newes.es_lblk = lblk;
956107a7bd3STheodore Ts'o 	newes.es_len = len;
9579a6633b1STheodore Ts'o 	ext4_es_store_pblock_status(&newes, pblk, status);
958107a7bd3STheodore Ts'o 	trace_ext4_es_cache_extent(inode, &newes);
959107a7bd3STheodore Ts'o 
960107a7bd3STheodore Ts'o 	if (!len)
961107a7bd3STheodore Ts'o 		return;
962107a7bd3STheodore Ts'o 
963107a7bd3STheodore Ts'o 	BUG_ON(end < lblk);
964107a7bd3STheodore Ts'o 
965107a7bd3STheodore Ts'o 	write_lock(&EXT4_I(inode)->i_es_lock);
966107a7bd3STheodore Ts'o 
967107a7bd3STheodore Ts'o 	es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
9687869a4a6STheodore Ts'o 	if (!es || es->es_lblk > end)
96995f0b320SBaokun Li 		__es_insert_extent(inode, &newes, NULL);
970107a7bd3STheodore Ts'o 	write_unlock(&EXT4_I(inode)->i_es_lock);
971107a7bd3STheodore Ts'o }
972107a7bd3STheodore Ts'o 
973107a7bd3STheodore Ts'o /*
974d100eef2SZheng Liu  * ext4_es_lookup_extent() looks up an extent in extent status tree.
975d100eef2SZheng Liu  *
976d100eef2SZheng Liu  * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks.
977d100eef2SZheng Liu  *
978d100eef2SZheng Liu  * Return: 1 on found, 0 on not
979d100eef2SZheng Liu  */
ext4_es_lookup_extent(struct inode * inode,ext4_lblk_t lblk,ext4_lblk_t * next_lblk,struct extent_status * es)980d100eef2SZheng Liu int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
981bb5835edSTheodore Ts'o 			  ext4_lblk_t *next_lblk,
982d100eef2SZheng Liu 			  struct extent_status *es)
983d100eef2SZheng Liu {
984d100eef2SZheng Liu 	struct ext4_es_tree *tree;
985eb68d0e2SZheng Liu 	struct ext4_es_stats *stats;
986d100eef2SZheng Liu 	struct extent_status *es1 = NULL;
987d100eef2SZheng Liu 	struct rb_node *node;
988d100eef2SZheng Liu 	int found = 0;
989d100eef2SZheng Liu 
9908016e29fSHarshad Shirwadkar 	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
9918016e29fSHarshad Shirwadkar 		return 0;
9928016e29fSHarshad Shirwadkar 
993d100eef2SZheng Liu 	trace_ext4_es_lookup_extent_enter(inode, lblk);
994d100eef2SZheng Liu 	es_debug("lookup extent in block %u\n", lblk);
995d100eef2SZheng Liu 
996d100eef2SZheng Liu 	tree = &EXT4_I(inode)->i_es_tree;
997d100eef2SZheng Liu 	read_lock(&EXT4_I(inode)->i_es_lock);
998d100eef2SZheng Liu 
999d100eef2SZheng Liu 	/* find extent in cache firstly */
1000d100eef2SZheng Liu 	es->es_lblk = es->es_len = es->es_pblk = 0;
1001492888dfSJan Kara 	es1 = READ_ONCE(tree->cache_es);
1002492888dfSJan Kara 	if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) {
1003d100eef2SZheng Liu 		es_debug("%u cached by [%u/%u)\n",
1004d100eef2SZheng Liu 			 lblk, es1->es_lblk, es1->es_len);
1005d100eef2SZheng Liu 		found = 1;
1006d100eef2SZheng Liu 		goto out;
1007d100eef2SZheng Liu 	}
1008d100eef2SZheng Liu 
1009d100eef2SZheng Liu 	node = tree->root.rb_node;
1010d100eef2SZheng Liu 	while (node) {
1011d100eef2SZheng Liu 		es1 = rb_entry(node, struct extent_status, rb_node);
1012d100eef2SZheng Liu 		if (lblk < es1->es_lblk)
1013d100eef2SZheng Liu 			node = node->rb_left;
1014d100eef2SZheng Liu 		else if (lblk > ext4_es_end(es1))
1015d100eef2SZheng Liu 			node = node->rb_right;
1016d100eef2SZheng Liu 		else {
1017d100eef2SZheng Liu 			found = 1;
1018d100eef2SZheng Liu 			break;
1019d100eef2SZheng Liu 		}
1020d100eef2SZheng Liu 	}
1021d100eef2SZheng Liu 
1022d100eef2SZheng Liu out:
1023eb68d0e2SZheng Liu 	stats = &EXT4_SB(inode->i_sb)->s_es_stats;
1024d100eef2SZheng Liu 	if (found) {
1025d100eef2SZheng Liu 		BUG_ON(!es1);
1026d100eef2SZheng Liu 		es->es_lblk = es1->es_lblk;
1027d100eef2SZheng Liu 		es->es_len = es1->es_len;
1028d100eef2SZheng Liu 		es->es_pblk = es1->es_pblk;
102987d8a74bSJan Kara 		if (!ext4_es_is_referenced(es1))
103087d8a74bSJan Kara 			ext4_es_set_referenced(es1);
1031520f897aSYang Guo 		percpu_counter_inc(&stats->es_stats_cache_hits);
1032bb5835edSTheodore Ts'o 		if (next_lblk) {
1033bb5835edSTheodore Ts'o 			node = rb_next(&es1->rb_node);
1034bb5835edSTheodore Ts'o 			if (node) {
1035bb5835edSTheodore Ts'o 				es1 = rb_entry(node, struct extent_status,
1036bb5835edSTheodore Ts'o 					       rb_node);
1037bb5835edSTheodore Ts'o 				*next_lblk = es1->es_lblk;
1038bb5835edSTheodore Ts'o 			} else
1039bb5835edSTheodore Ts'o 				*next_lblk = 0;
1040bb5835edSTheodore Ts'o 		}
1041eb68d0e2SZheng Liu 	} else {
1042520f897aSYang Guo 		percpu_counter_inc(&stats->es_stats_cache_misses);
1043d100eef2SZheng Liu 	}
1044d100eef2SZheng Liu 
1045d100eef2SZheng Liu 	read_unlock(&EXT4_I(inode)->i_es_lock);
1046d100eef2SZheng Liu 
1047d100eef2SZheng Liu 	trace_ext4_es_lookup_extent_exit(inode, es, found);
1048d100eef2SZheng Liu 	return found;
1049d100eef2SZheng Liu }
1050d100eef2SZheng Liu 
10518fcc3a58SEric Whitney struct rsvd_count {
10528fcc3a58SEric Whitney 	int ndelonly;
10538fcc3a58SEric Whitney 	bool first_do_lblk_found;
10548fcc3a58SEric Whitney 	ext4_lblk_t first_do_lblk;
10558fcc3a58SEric Whitney 	ext4_lblk_t last_do_lblk;
10568fcc3a58SEric Whitney 	struct extent_status *left_es;
10578fcc3a58SEric Whitney 	bool partial;
10588fcc3a58SEric Whitney 	ext4_lblk_t lclu;
10598fcc3a58SEric Whitney };
10608fcc3a58SEric Whitney 
10618fcc3a58SEric Whitney /*
10628fcc3a58SEric Whitney  * init_rsvd - initialize reserved count data before removing block range
10638fcc3a58SEric Whitney  *	       in file from extent status tree
10648fcc3a58SEric Whitney  *
10658fcc3a58SEric Whitney  * @inode - file containing range
10668fcc3a58SEric Whitney  * @lblk - first block in range
10678fcc3a58SEric Whitney  * @es - pointer to first extent in range
10688fcc3a58SEric Whitney  * @rc - pointer to reserved count data
10698fcc3a58SEric Whitney  *
10708fcc3a58SEric Whitney  * Assumes es is not NULL
10718fcc3a58SEric Whitney  */
init_rsvd(struct inode * inode,ext4_lblk_t lblk,struct extent_status * es,struct rsvd_count * rc)10728fcc3a58SEric Whitney static void init_rsvd(struct inode *inode, ext4_lblk_t lblk,
10738fcc3a58SEric Whitney 		      struct extent_status *es, struct rsvd_count *rc)
10748fcc3a58SEric Whitney {
10758fcc3a58SEric Whitney 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
10768fcc3a58SEric Whitney 	struct rb_node *node;
10778fcc3a58SEric Whitney 
10788fcc3a58SEric Whitney 	rc->ndelonly = 0;
10798fcc3a58SEric Whitney 
10808fcc3a58SEric Whitney 	/*
10818fcc3a58SEric Whitney 	 * for bigalloc, note the first delonly block in the range has not
10828fcc3a58SEric Whitney 	 * been found, record the extent containing the block to the left of
10838fcc3a58SEric Whitney 	 * the region to be removed, if any, and note that there's no partial
10848fcc3a58SEric Whitney 	 * cluster to track
10858fcc3a58SEric Whitney 	 */
10868fcc3a58SEric Whitney 	if (sbi->s_cluster_ratio > 1) {
10878fcc3a58SEric Whitney 		rc->first_do_lblk_found = false;
10888fcc3a58SEric Whitney 		if (lblk > es->es_lblk) {
10898fcc3a58SEric Whitney 			rc->left_es = es;
10908fcc3a58SEric Whitney 		} else {
10918fcc3a58SEric Whitney 			node = rb_prev(&es->rb_node);
10928fcc3a58SEric Whitney 			rc->left_es = node ? rb_entry(node,
10938fcc3a58SEric Whitney 						      struct extent_status,
10948fcc3a58SEric Whitney 						      rb_node) : NULL;
10958fcc3a58SEric Whitney 		}
10968fcc3a58SEric Whitney 		rc->partial = false;
10978fcc3a58SEric Whitney 	}
10988fcc3a58SEric Whitney }
10998fcc3a58SEric Whitney 
11008fcc3a58SEric Whitney /*
11018fcc3a58SEric Whitney  * count_rsvd - count the clusters containing delayed and not unwritten
11028fcc3a58SEric Whitney  *		(delonly) blocks in a range within an extent and add to
11038fcc3a58SEric Whitney  *	        the running tally in rsvd_count
11048fcc3a58SEric Whitney  *
11058fcc3a58SEric Whitney  * @inode - file containing extent
11068fcc3a58SEric Whitney  * @lblk - first block in range
11078fcc3a58SEric Whitney  * @len - length of range in blocks
11088fcc3a58SEric Whitney  * @es - pointer to extent containing clusters to be counted
11098fcc3a58SEric Whitney  * @rc - pointer to reserved count data
11108fcc3a58SEric Whitney  *
11118fcc3a58SEric Whitney  * Tracks partial clusters found at the beginning and end of extents so
11128fcc3a58SEric Whitney  * they aren't overcounted when they span adjacent extents
11138fcc3a58SEric Whitney  */
count_rsvd(struct inode * inode,ext4_lblk_t lblk,long len,struct extent_status * es,struct rsvd_count * rc)11148fcc3a58SEric Whitney static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
11158fcc3a58SEric Whitney 		       struct extent_status *es, struct rsvd_count *rc)
11168fcc3a58SEric Whitney {
11178fcc3a58SEric Whitney 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
11188fcc3a58SEric Whitney 	ext4_lblk_t i, end, nclu;
11198fcc3a58SEric Whitney 
11208fcc3a58SEric Whitney 	if (!ext4_es_is_delonly(es))
11218fcc3a58SEric Whitney 		return;
11228fcc3a58SEric Whitney 
11238fcc3a58SEric Whitney 	WARN_ON(len <= 0);
11248fcc3a58SEric Whitney 
11258fcc3a58SEric Whitney 	if (sbi->s_cluster_ratio == 1) {
11268fcc3a58SEric Whitney 		rc->ndelonly += (int) len;
11278fcc3a58SEric Whitney 		return;
11288fcc3a58SEric Whitney 	}
11298fcc3a58SEric Whitney 
11308fcc3a58SEric Whitney 	/* bigalloc */
11318fcc3a58SEric Whitney 
11328fcc3a58SEric Whitney 	i = (lblk < es->es_lblk) ? es->es_lblk : lblk;
11338fcc3a58SEric Whitney 	end = lblk + (ext4_lblk_t) len - 1;
11348fcc3a58SEric Whitney 	end = (end > ext4_es_end(es)) ? ext4_es_end(es) : end;
11358fcc3a58SEric Whitney 
11368fcc3a58SEric Whitney 	/* record the first block of the first delonly extent seen */
113739c0ae16SJason Yan 	if (!rc->first_do_lblk_found) {
11388fcc3a58SEric Whitney 		rc->first_do_lblk = i;
11398fcc3a58SEric Whitney 		rc->first_do_lblk_found = true;
11408fcc3a58SEric Whitney 	}
11418fcc3a58SEric Whitney 
11428fcc3a58SEric Whitney 	/* update the last lblk in the region seen so far */
11438fcc3a58SEric Whitney 	rc->last_do_lblk = end;
11448fcc3a58SEric Whitney 
11458fcc3a58SEric Whitney 	/*
11468fcc3a58SEric Whitney 	 * if we're tracking a partial cluster and the current extent
11478fcc3a58SEric Whitney 	 * doesn't start with it, count it and stop tracking
11488fcc3a58SEric Whitney 	 */
11498fcc3a58SEric Whitney 	if (rc->partial && (rc->lclu != EXT4_B2C(sbi, i))) {
11508fcc3a58SEric Whitney 		rc->ndelonly++;
11518fcc3a58SEric Whitney 		rc->partial = false;
11528fcc3a58SEric Whitney 	}
11538fcc3a58SEric Whitney 
11548fcc3a58SEric Whitney 	/*
11558fcc3a58SEric Whitney 	 * if the first cluster doesn't start on a cluster boundary but
11568fcc3a58SEric Whitney 	 * ends on one, count it
11578fcc3a58SEric Whitney 	 */
11588fcc3a58SEric Whitney 	if (EXT4_LBLK_COFF(sbi, i) != 0) {
11598fcc3a58SEric Whitney 		if (end >= EXT4_LBLK_CFILL(sbi, i)) {
11608fcc3a58SEric Whitney 			rc->ndelonly++;
11618fcc3a58SEric Whitney 			rc->partial = false;
11628fcc3a58SEric Whitney 			i = EXT4_LBLK_CFILL(sbi, i) + 1;
11638fcc3a58SEric Whitney 		}
11648fcc3a58SEric Whitney 	}
11658fcc3a58SEric Whitney 
11668fcc3a58SEric Whitney 	/*
11678fcc3a58SEric Whitney 	 * if the current cluster starts on a cluster boundary, count the
11688fcc3a58SEric Whitney 	 * number of whole delonly clusters in the extent
11698fcc3a58SEric Whitney 	 */
11708fcc3a58SEric Whitney 	if ((i + sbi->s_cluster_ratio - 1) <= end) {
11718fcc3a58SEric Whitney 		nclu = (end - i + 1) >> sbi->s_cluster_bits;
11728fcc3a58SEric Whitney 		rc->ndelonly += nclu;
11738fcc3a58SEric Whitney 		i += nclu << sbi->s_cluster_bits;
11748fcc3a58SEric Whitney 	}
11758fcc3a58SEric Whitney 
11768fcc3a58SEric Whitney 	/*
11778fcc3a58SEric Whitney 	 * start tracking a partial cluster if there's a partial at the end
11788fcc3a58SEric Whitney 	 * of the current extent and we're not already tracking one
11798fcc3a58SEric Whitney 	 */
11808fcc3a58SEric Whitney 	if (!rc->partial && i <= end) {
11818fcc3a58SEric Whitney 		rc->partial = true;
11828fcc3a58SEric Whitney 		rc->lclu = EXT4_B2C(sbi, i);
11838fcc3a58SEric Whitney 	}
11848fcc3a58SEric Whitney }
11858fcc3a58SEric Whitney 
11868fcc3a58SEric Whitney /*
11878fcc3a58SEric Whitney  * __pr_tree_search - search for a pending cluster reservation
11888fcc3a58SEric Whitney  *
11898fcc3a58SEric Whitney  * @root - root of pending reservation tree
11908fcc3a58SEric Whitney  * @lclu - logical cluster to search for
11918fcc3a58SEric Whitney  *
11928fcc3a58SEric Whitney  * Returns the pending reservation for the cluster identified by @lclu
11938fcc3a58SEric Whitney  * if found.  If not, returns a reservation for the next cluster if any,
11948fcc3a58SEric Whitney  * and if not, returns NULL.
11958fcc3a58SEric Whitney  */
__pr_tree_search(struct rb_root * root,ext4_lblk_t lclu)11968fcc3a58SEric Whitney static struct pending_reservation *__pr_tree_search(struct rb_root *root,
11978fcc3a58SEric Whitney 						    ext4_lblk_t lclu)
11988fcc3a58SEric Whitney {
11998fcc3a58SEric Whitney 	struct rb_node *node = root->rb_node;
12008fcc3a58SEric Whitney 	struct pending_reservation *pr = NULL;
12018fcc3a58SEric Whitney 
12028fcc3a58SEric Whitney 	while (node) {
12038fcc3a58SEric Whitney 		pr = rb_entry(node, struct pending_reservation, rb_node);
12048fcc3a58SEric Whitney 		if (lclu < pr->lclu)
12058fcc3a58SEric Whitney 			node = node->rb_left;
12068fcc3a58SEric Whitney 		else if (lclu > pr->lclu)
12078fcc3a58SEric Whitney 			node = node->rb_right;
12088fcc3a58SEric Whitney 		else
12098fcc3a58SEric Whitney 			return pr;
12108fcc3a58SEric Whitney 	}
12118fcc3a58SEric Whitney 	if (pr && lclu < pr->lclu)
12128fcc3a58SEric Whitney 		return pr;
12138fcc3a58SEric Whitney 	if (pr && lclu > pr->lclu) {
12148fcc3a58SEric Whitney 		node = rb_next(&pr->rb_node);
12158fcc3a58SEric Whitney 		return node ? rb_entry(node, struct pending_reservation,
12168fcc3a58SEric Whitney 				       rb_node) : NULL;
12178fcc3a58SEric Whitney 	}
12188fcc3a58SEric Whitney 	return NULL;
12198fcc3a58SEric Whitney }
12208fcc3a58SEric Whitney 
12218fcc3a58SEric Whitney /*
12228fcc3a58SEric Whitney  * get_rsvd - calculates and returns the number of cluster reservations to be
12238fcc3a58SEric Whitney  *	      released when removing a block range from the extent status tree
12248fcc3a58SEric Whitney  *	      and releases any pending reservations within the range
12258fcc3a58SEric Whitney  *
12268fcc3a58SEric Whitney  * @inode - file containing block range
12278fcc3a58SEric Whitney  * @end - last block in range
12288fcc3a58SEric Whitney  * @right_es - pointer to extent containing next block beyond end or NULL
12298fcc3a58SEric Whitney  * @rc - pointer to reserved count data
12308fcc3a58SEric Whitney  *
12318fcc3a58SEric Whitney  * The number of reservations to be released is equal to the number of
12328fcc3a58SEric Whitney  * clusters containing delayed and not unwritten (delonly) blocks within
12338fcc3a58SEric Whitney  * the range, minus the number of clusters still containing delonly blocks
12348fcc3a58SEric Whitney  * at the ends of the range, and minus the number of pending reservations
12358fcc3a58SEric Whitney  * within the range.
12368fcc3a58SEric Whitney  */
get_rsvd(struct inode * inode,ext4_lblk_t end,struct extent_status * right_es,struct rsvd_count * rc)12378fcc3a58SEric Whitney static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
12388fcc3a58SEric Whitney 			     struct extent_status *right_es,
12398fcc3a58SEric Whitney 			     struct rsvd_count *rc)
12408fcc3a58SEric Whitney {
12418fcc3a58SEric Whitney 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
12428fcc3a58SEric Whitney 	struct pending_reservation *pr;
12438fcc3a58SEric Whitney 	struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
12448fcc3a58SEric Whitney 	struct rb_node *node;
12458fcc3a58SEric Whitney 	ext4_lblk_t first_lclu, last_lclu;
12468fcc3a58SEric Whitney 	bool left_delonly, right_delonly, count_pending;
12478fcc3a58SEric Whitney 	struct extent_status *es;
12488fcc3a58SEric Whitney 
12498fcc3a58SEric Whitney 	if (sbi->s_cluster_ratio > 1) {
12508fcc3a58SEric Whitney 		/* count any remaining partial cluster */
12518fcc3a58SEric Whitney 		if (rc->partial)
12528fcc3a58SEric Whitney 			rc->ndelonly++;
12538fcc3a58SEric Whitney 
12548fcc3a58SEric Whitney 		if (rc->ndelonly == 0)
12558fcc3a58SEric Whitney 			return 0;
12568fcc3a58SEric Whitney 
12578fcc3a58SEric Whitney 		first_lclu = EXT4_B2C(sbi, rc->first_do_lblk);
12588fcc3a58SEric Whitney 		last_lclu = EXT4_B2C(sbi, rc->last_do_lblk);
12598fcc3a58SEric Whitney 
12608fcc3a58SEric Whitney 		/*
12618fcc3a58SEric Whitney 		 * decrease the delonly count by the number of clusters at the
12628fcc3a58SEric Whitney 		 * ends of the range that still contain delonly blocks -
12638fcc3a58SEric Whitney 		 * these clusters still need to be reserved
12648fcc3a58SEric Whitney 		 */
12658fcc3a58SEric Whitney 		left_delonly = right_delonly = false;
12668fcc3a58SEric Whitney 
12678fcc3a58SEric Whitney 		es = rc->left_es;
12688fcc3a58SEric Whitney 		while (es && ext4_es_end(es) >=
12698fcc3a58SEric Whitney 		       EXT4_LBLK_CMASK(sbi, rc->first_do_lblk)) {
12708fcc3a58SEric Whitney 			if (ext4_es_is_delonly(es)) {
12718fcc3a58SEric Whitney 				rc->ndelonly--;
12728fcc3a58SEric Whitney 				left_delonly = true;
12738fcc3a58SEric Whitney 				break;
12748fcc3a58SEric Whitney 			}
12758fcc3a58SEric Whitney 			node = rb_prev(&es->rb_node);
12768fcc3a58SEric Whitney 			if (!node)
12778fcc3a58SEric Whitney 				break;
12788fcc3a58SEric Whitney 			es = rb_entry(node, struct extent_status, rb_node);
12798fcc3a58SEric Whitney 		}
12808fcc3a58SEric Whitney 		if (right_es && (!left_delonly || first_lclu != last_lclu)) {
12818fcc3a58SEric Whitney 			if (end < ext4_es_end(right_es)) {
12828fcc3a58SEric Whitney 				es = right_es;
12838fcc3a58SEric Whitney 			} else {
12848fcc3a58SEric Whitney 				node = rb_next(&right_es->rb_node);
12858fcc3a58SEric Whitney 				es = node ? rb_entry(node, struct extent_status,
12868fcc3a58SEric Whitney 						     rb_node) : NULL;
12878fcc3a58SEric Whitney 			}
12888fcc3a58SEric Whitney 			while (es && es->es_lblk <=
12898fcc3a58SEric Whitney 			       EXT4_LBLK_CFILL(sbi, rc->last_do_lblk)) {
12908fcc3a58SEric Whitney 				if (ext4_es_is_delonly(es)) {
12918fcc3a58SEric Whitney 					rc->ndelonly--;
12928fcc3a58SEric Whitney 					right_delonly = true;
12938fcc3a58SEric Whitney 					break;
12948fcc3a58SEric Whitney 				}
12958fcc3a58SEric Whitney 				node = rb_next(&es->rb_node);
12968fcc3a58SEric Whitney 				if (!node)
12978fcc3a58SEric Whitney 					break;
12988fcc3a58SEric Whitney 				es = rb_entry(node, struct extent_status,
12998fcc3a58SEric Whitney 					      rb_node);
13008fcc3a58SEric Whitney 			}
13018fcc3a58SEric Whitney 		}
13028fcc3a58SEric Whitney 
13038fcc3a58SEric Whitney 		/*
13048fcc3a58SEric Whitney 		 * Determine the block range that should be searched for
13058fcc3a58SEric Whitney 		 * pending reservations, if any.  Clusters on the ends of the
13068fcc3a58SEric Whitney 		 * original removed range containing delonly blocks are
13078fcc3a58SEric Whitney 		 * excluded.  They've already been accounted for and it's not
13088fcc3a58SEric Whitney 		 * possible to determine if an associated pending reservation
13098fcc3a58SEric Whitney 		 * should be released with the information available in the
13108fcc3a58SEric Whitney 		 * extents status tree.
13118fcc3a58SEric Whitney 		 */
13128fcc3a58SEric Whitney 		if (first_lclu == last_lclu) {
13138fcc3a58SEric Whitney 			if (left_delonly | right_delonly)
13148fcc3a58SEric Whitney 				count_pending = false;
13158fcc3a58SEric Whitney 			else
13168fcc3a58SEric Whitney 				count_pending = true;
13178fcc3a58SEric Whitney 		} else {
13188fcc3a58SEric Whitney 			if (left_delonly)
13198fcc3a58SEric Whitney 				first_lclu++;
13208fcc3a58SEric Whitney 			if (right_delonly)
13218fcc3a58SEric Whitney 				last_lclu--;
13228fcc3a58SEric Whitney 			if (first_lclu <= last_lclu)
13238fcc3a58SEric Whitney 				count_pending = true;
13248fcc3a58SEric Whitney 			else
13258fcc3a58SEric Whitney 				count_pending = false;
13268fcc3a58SEric Whitney 		}
13278fcc3a58SEric Whitney 
13288fcc3a58SEric Whitney 		/*
13298fcc3a58SEric Whitney 		 * a pending reservation found between first_lclu and last_lclu
13308fcc3a58SEric Whitney 		 * represents an allocated cluster that contained at least one
13318fcc3a58SEric Whitney 		 * delonly block, so the delonly total must be reduced by one
13328fcc3a58SEric Whitney 		 * for each pending reservation found and released
13338fcc3a58SEric Whitney 		 */
13348fcc3a58SEric Whitney 		if (count_pending) {
13358fcc3a58SEric Whitney 			pr = __pr_tree_search(&tree->root, first_lclu);
13368fcc3a58SEric Whitney 			while (pr && pr->lclu <= last_lclu) {
13378fcc3a58SEric Whitney 				rc->ndelonly--;
13388fcc3a58SEric Whitney 				node = rb_next(&pr->rb_node);
13398fcc3a58SEric Whitney 				rb_erase(&pr->rb_node, &tree->root);
1340*8e387c89SZhang Yi 				__free_pending(pr);
13418fcc3a58SEric Whitney 				if (!node)
13428fcc3a58SEric Whitney 					break;
13438fcc3a58SEric Whitney 				pr = rb_entry(node, struct pending_reservation,
13448fcc3a58SEric Whitney 					      rb_node);
13458fcc3a58SEric Whitney 			}
13468fcc3a58SEric Whitney 		}
13478fcc3a58SEric Whitney 	}
13488fcc3a58SEric Whitney 	return rc->ndelonly;
13498fcc3a58SEric Whitney }
13508fcc3a58SEric Whitney 
13518fcc3a58SEric Whitney 
13528fcc3a58SEric Whitney /*
13538fcc3a58SEric Whitney  * __es_remove_extent - removes block range from extent status tree
13548fcc3a58SEric Whitney  *
13558fcc3a58SEric Whitney  * @inode - file containing range
13568fcc3a58SEric Whitney  * @lblk - first block in range
13578fcc3a58SEric Whitney  * @end - last block in range
13588fcc3a58SEric Whitney  * @reserved - number of cluster reservations released
1359bda3efafSBaokun Li  * @prealloc - pre-allocated es to avoid memory allocation failures
13608fcc3a58SEric Whitney  *
13618fcc3a58SEric Whitney  * If @reserved is not NULL and delayed allocation is enabled, counts
13628fcc3a58SEric Whitney  * block/cluster reservations freed by removing range and if bigalloc
13638fcc3a58SEric Whitney  * enabled cancels pending reservations as needed. Returns 0 on success,
13648fcc3a58SEric Whitney  * error code on failure.
13658fcc3a58SEric Whitney  */
__es_remove_extent(struct inode * inode,ext4_lblk_t lblk,ext4_lblk_t end,int * reserved,struct extent_status * prealloc)1366bdedbb7bSZheng Liu static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
1367bda3efafSBaokun Li 			      ext4_lblk_t end, int *reserved,
1368bda3efafSBaokun Li 			      struct extent_status *prealloc)
1369654598beSZheng Liu {
1370bdedbb7bSZheng Liu 	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
1371654598beSZheng Liu 	struct rb_node *node;
1372654598beSZheng Liu 	struct extent_status *es;
1373654598beSZheng Liu 	struct extent_status orig_es;
137406b0c886SZheng Liu 	ext4_lblk_t len1, len2;
1375fdc0212eSZheng Liu 	ext4_fsblk_t block;
1376bda3efafSBaokun Li 	int err = 0;
13778fcc3a58SEric Whitney 	bool count_reserved = true;
13788fcc3a58SEric Whitney 	struct rsvd_count rc;
1379654598beSZheng Liu 
13808fcc3a58SEric Whitney 	if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC))
13818fcc3a58SEric Whitney 		count_reserved = false;
13828fcc3a58SEric Whitney 
138306b0c886SZheng Liu 	es = __es_tree_search(&tree->root, lblk);
1384654598beSZheng Liu 	if (!es)
1385654598beSZheng Liu 		goto out;
138606b0c886SZheng Liu 	if (es->es_lblk > end)
1387654598beSZheng Liu 		goto out;
1388654598beSZheng Liu 
1389654598beSZheng Liu 	/* Simply invalidate cache_es. */
1390654598beSZheng Liu 	tree->cache_es = NULL;
13918fcc3a58SEric Whitney 	if (count_reserved)
13928fcc3a58SEric Whitney 		init_rsvd(inode, lblk, es, &rc);
1393654598beSZheng Liu 
139406b0c886SZheng Liu 	orig_es.es_lblk = es->es_lblk;
139506b0c886SZheng Liu 	orig_es.es_len = es->es_len;
1396fdc0212eSZheng Liu 	orig_es.es_pblk = es->es_pblk;
1397fdc0212eSZheng Liu 
139806b0c886SZheng Liu 	len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
139906b0c886SZheng Liu 	len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0;
1400654598beSZheng Liu 	if (len1 > 0)
140106b0c886SZheng Liu 		es->es_len = len1;
1402654598beSZheng Liu 	if (len2 > 0) {
1403654598beSZheng Liu 		if (len1 > 0) {
140406b0c886SZheng Liu 			struct extent_status newes;
140506b0c886SZheng Liu 
140606b0c886SZheng Liu 			newes.es_lblk = end + 1;
140706b0c886SZheng Liu 			newes.es_len = len2;
1408666525dfSChen Gang 			block = 0x7FDEADBEEFULL;
1409fdc0212eSZheng Liu 			if (ext4_es_is_written(&orig_es) ||
14109a6633b1STheodore Ts'o 			    ext4_es_is_unwritten(&orig_es))
1411fdc0212eSZheng Liu 				block = ext4_es_pblock(&orig_es) +
1412fdc0212eSZheng Liu 					orig_es.es_len - len2;
14139a6633b1STheodore Ts'o 			ext4_es_store_pblock_status(&newes, block,
14149a6633b1STheodore Ts'o 						    ext4_es_status(&orig_es));
1415bda3efafSBaokun Li 			err = __es_insert_extent(inode, &newes, prealloc);
1416654598beSZheng Liu 			if (err) {
1417bda3efafSBaokun Li 				if (!ext4_es_must_keep(&newes))
1418bda3efafSBaokun Li 					return 0;
1419bda3efafSBaokun Li 
142006b0c886SZheng Liu 				es->es_lblk = orig_es.es_lblk;
142106b0c886SZheng Liu 				es->es_len = orig_es.es_len;
1422654598beSZheng Liu 				goto out;
1423654598beSZheng Liu 			}
1424654598beSZheng Liu 		} else {
142506b0c886SZheng Liu 			es->es_lblk = end + 1;
142606b0c886SZheng Liu 			es->es_len = len2;
1427fdc0212eSZheng Liu 			if (ext4_es_is_written(es) ||
1428fdc0212eSZheng Liu 			    ext4_es_is_unwritten(es)) {
1429fdc0212eSZheng Liu 				block = orig_es.es_pblk + orig_es.es_len - len2;
1430fdc0212eSZheng Liu 				ext4_es_store_pblock(es, block);
1431fdc0212eSZheng Liu 			}
1432654598beSZheng Liu 		}
14338fcc3a58SEric Whitney 		if (count_reserved)
143440ea9839SZhang Yi 			count_rsvd(inode, orig_es.es_lblk + len1,
143540ea9839SZhang Yi 				   orig_es.es_len - len1 - len2, &orig_es, &rc);
14361da18e38SYe Bin 		goto out_get_reserved;
1437654598beSZheng Liu 	}
1438654598beSZheng Liu 
1439654598beSZheng Liu 	if (len1 > 0) {
14408fcc3a58SEric Whitney 		if (count_reserved)
14418fcc3a58SEric Whitney 			count_rsvd(inode, lblk, orig_es.es_len - len1,
14428fcc3a58SEric Whitney 				   &orig_es, &rc);
1443654598beSZheng Liu 		node = rb_next(&es->rb_node);
1444654598beSZheng Liu 		if (node)
1445654598beSZheng Liu 			es = rb_entry(node, struct extent_status, rb_node);
1446654598beSZheng Liu 		else
1447654598beSZheng Liu 			es = NULL;
1448654598beSZheng Liu 	}
1449654598beSZheng Liu 
145006b0c886SZheng Liu 	while (es && ext4_es_end(es) <= end) {
14518fcc3a58SEric Whitney 		if (count_reserved)
14528fcc3a58SEric Whitney 			count_rsvd(inode, es->es_lblk, es->es_len, es, &rc);
1453654598beSZheng Liu 		node = rb_next(&es->rb_node);
1454654598beSZheng Liu 		rb_erase(&es->rb_node, &tree->root);
1455bdedbb7bSZheng Liu 		ext4_es_free_extent(inode, es);
1456654598beSZheng Liu 		if (!node) {
1457654598beSZheng Liu 			es = NULL;
1458654598beSZheng Liu 			break;
1459654598beSZheng Liu 		}
1460654598beSZheng Liu 		es = rb_entry(node, struct extent_status, rb_node);
1461654598beSZheng Liu 	}
1462654598beSZheng Liu 
146306b0c886SZheng Liu 	if (es && es->es_lblk < end + 1) {
1464fdc0212eSZheng Liu 		ext4_lblk_t orig_len = es->es_len;
1465fdc0212eSZheng Liu 
146606b0c886SZheng Liu 		len1 = ext4_es_end(es) - end;
14678fcc3a58SEric Whitney 		if (count_reserved)
14688fcc3a58SEric Whitney 			count_rsvd(inode, es->es_lblk, orig_len - len1,
14698fcc3a58SEric Whitney 				   es, &rc);
147006b0c886SZheng Liu 		es->es_lblk = end + 1;
147106b0c886SZheng Liu 		es->es_len = len1;
1472fdc0212eSZheng Liu 		if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) {
1473fdc0212eSZheng Liu 			block = es->es_pblk + orig_len - len1;
1474fdc0212eSZheng Liu 			ext4_es_store_pblock(es, block);
1475fdc0212eSZheng Liu 		}
1476654598beSZheng Liu 	}
1477654598beSZheng Liu 
14781da18e38SYe Bin out_get_reserved:
14798fcc3a58SEric Whitney 	if (count_reserved)
14808fcc3a58SEric Whitney 		*reserved = get_rsvd(inode, end, es, &rc);
1481654598beSZheng Liu out:
148206b0c886SZheng Liu 	return err;
148306b0c886SZheng Liu }
148406b0c886SZheng Liu 
148506b0c886SZheng Liu /*
14868fcc3a58SEric Whitney  * ext4_es_remove_extent - removes block range from extent status tree
148706b0c886SZheng Liu  *
14888fcc3a58SEric Whitney  * @inode - file containing range
14898fcc3a58SEric Whitney  * @lblk - first block in range
14908fcc3a58SEric Whitney  * @len - number of blocks to remove
14918fcc3a58SEric Whitney  *
14928fcc3a58SEric Whitney  * Reduces block/cluster reservation count and for bigalloc cancels pending
1493ed5d285bSBaokun Li  * reservations as needed.
149406b0c886SZheng Liu  */
ext4_es_remove_extent(struct inode * inode,ext4_lblk_t lblk,ext4_lblk_t len)1495ed5d285bSBaokun Li void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
149606b0c886SZheng Liu 			   ext4_lblk_t len)
149706b0c886SZheng Liu {
149806b0c886SZheng Liu 	ext4_lblk_t end;
149906b0c886SZheng Liu 	int err = 0;
15008fcc3a58SEric Whitney 	int reserved = 0;
1501e9fe2b88SBaokun Li 	struct extent_status *es = NULL;
150206b0c886SZheng Liu 
15038016e29fSHarshad Shirwadkar 	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
1504ed5d285bSBaokun Li 		return;
15058016e29fSHarshad Shirwadkar 
150606b0c886SZheng Liu 	trace_ext4_es_remove_extent(inode, lblk, len);
150706b0c886SZheng Liu 	es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
150806b0c886SZheng Liu 		 lblk, len, inode->i_ino);
150906b0c886SZheng Liu 
1510d4381472SEryu Guan 	if (!len)
1511ed5d285bSBaokun Li 		return;
1512d4381472SEryu Guan 
151306b0c886SZheng Liu 	end = lblk + len - 1;
151406b0c886SZheng Liu 	BUG_ON(end < lblk);
151506b0c886SZheng Liu 
1516e9fe2b88SBaokun Li retry:
1517e9fe2b88SBaokun Li 	if (err && !es)
1518e9fe2b88SBaokun Li 		es = __es_alloc_extent(true);
1519edaa53caSZheng Liu 	/*
1520edaa53caSZheng Liu 	 * ext4_clear_inode() depends on us taking i_es_lock unconditionally
1521edaa53caSZheng Liu 	 * so that we are sure __es_shrink() is done with the inode before it
1522edaa53caSZheng Liu 	 * is reclaimed.
1523edaa53caSZheng Liu 	 */
152406b0c886SZheng Liu 	write_lock(&EXT4_I(inode)->i_es_lock);
1525e9fe2b88SBaokun Li 	err = __es_remove_extent(inode, lblk, end, &reserved, es);
1526768d612fSBaokun Li 	/* Free preallocated extent if it didn't get used. */
1527768d612fSBaokun Li 	if (es) {
1528768d612fSBaokun Li 		if (!es->es_len)
1529e9fe2b88SBaokun Li 			__es_free_extent(es);
1530768d612fSBaokun Li 		es = NULL;
1531768d612fSBaokun Li 	}
1532654598beSZheng Liu 	write_unlock(&EXT4_I(inode)->i_es_lock);
1533e9fe2b88SBaokun Li 	if (err)
1534e9fe2b88SBaokun Li 		goto retry;
1535e9fe2b88SBaokun Li 
1536654598beSZheng Liu 	ext4_es_print_tree(inode);
15378fcc3a58SEric Whitney 	ext4_da_release_space(inode, reserved);
1538ed5d285bSBaokun Li 	return;
1539654598beSZheng Liu }
154074cd15cdSZheng Liu 
__es_shrink(struct ext4_sb_info * sbi,int nr_to_scan,struct ext4_inode_info * locked_ei)1541edaa53caSZheng Liu static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
1542e15f742cSTheodore Ts'o 		       struct ext4_inode_info *locked_ei)
154374cd15cdSZheng Liu {
154474cd15cdSZheng Liu 	struct ext4_inode_info *ei;
1545eb68d0e2SZheng Liu 	struct ext4_es_stats *es_stats;
1546eb68d0e2SZheng Liu 	ktime_t start_time;
1547eb68d0e2SZheng Liu 	u64 scan_time;
1548edaa53caSZheng Liu 	int nr_to_walk;
15491ab6c499SDave Chinner 	int nr_shrunk = 0;
1550edaa53caSZheng Liu 	int retried = 0, nr_skipped = 0;
155174cd15cdSZheng Liu 
1552eb68d0e2SZheng Liu 	es_stats = &sbi->s_es_stats;
1553eb68d0e2SZheng Liu 	start_time = ktime_get();
1554d3922a77SZheng Liu 
15557869a4a6STheodore Ts'o retry:
1556edaa53caSZheng Liu 	spin_lock(&sbi->s_es_lock);
1557edaa53caSZheng Liu 	nr_to_walk = sbi->s_es_nr_inode;
1558edaa53caSZheng Liu 	while (nr_to_walk-- > 0) {
1559edaa53caSZheng Liu 		if (list_empty(&sbi->s_es_list)) {
1560edaa53caSZheng Liu 			spin_unlock(&sbi->s_es_lock);
1561edaa53caSZheng Liu 			goto out;
1562edaa53caSZheng Liu 		}
1563edaa53caSZheng Liu 		ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
1564edaa53caSZheng Liu 				      i_es_list);
1565edaa53caSZheng Liu 		/* Move the inode to the tail */
1566dd475925SJan Kara 		list_move_tail(&ei->i_es_list, &sbi->s_es_list);
156774cd15cdSZheng Liu 
15687869a4a6STheodore Ts'o 		/*
1569edaa53caSZheng Liu 		 * Normally we try hard to avoid shrinking precached inodes,
1570edaa53caSZheng Liu 		 * but we will as a last resort.
15717869a4a6STheodore Ts'o 		 */
1572edaa53caSZheng Liu 		if (!retried && ext4_test_inode_state(&ei->vfs_inode,
1573edaa53caSZheng Liu 						EXT4_STATE_EXT_PRECACHED)) {
15747869a4a6STheodore Ts'o 			nr_skipped++;
157574cd15cdSZheng Liu 			continue;
157674cd15cdSZheng Liu 		}
1577d3922a77SZheng Liu 
1578edaa53caSZheng Liu 		if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) {
1579edaa53caSZheng Liu 			nr_skipped++;
1580d3922a77SZheng Liu 			continue;
1581edaa53caSZheng Liu 		}
1582edaa53caSZheng Liu 		/*
1583edaa53caSZheng Liu 		 * Now we hold i_es_lock which protects us from inode reclaim
1584edaa53caSZheng Liu 		 * freeing inode under us
1585edaa53caSZheng Liu 		 */
1586edaa53caSZheng Liu 		spin_unlock(&sbi->s_es_lock);
158774cd15cdSZheng Liu 
1588dd475925SJan Kara 		nr_shrunk += es_reclaim_extents(ei, &nr_to_scan);
158974cd15cdSZheng Liu 		write_unlock(&ei->i_es_lock);
159074cd15cdSZheng Liu 
1591dd475925SJan Kara 		if (nr_to_scan <= 0)
1592edaa53caSZheng Liu 			goto out;
1593edaa53caSZheng Liu 		spin_lock(&sbi->s_es_lock);
1594edaa53caSZheng Liu 	}
1595edaa53caSZheng Liu 	spin_unlock(&sbi->s_es_lock);
15967869a4a6STheodore Ts'o 
15977869a4a6STheodore Ts'o 	/*
15987869a4a6STheodore Ts'o 	 * If we skipped any inodes, and we weren't able to make any
1599edaa53caSZheng Liu 	 * forward progress, try again to scan precached inodes.
16007869a4a6STheodore Ts'o 	 */
16017869a4a6STheodore Ts'o 	if ((nr_shrunk == 0) && nr_skipped && !retried) {
16027869a4a6STheodore Ts'o 		retried++;
16037869a4a6STheodore Ts'o 		goto retry;
16047869a4a6STheodore Ts'o 	}
16057869a4a6STheodore Ts'o 
1606e15f742cSTheodore Ts'o 	if (locked_ei && nr_shrunk == 0)
1607dd475925SJan Kara 		nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan);
1608e15f742cSTheodore Ts'o 
1609edaa53caSZheng Liu out:
1610eb68d0e2SZheng Liu 	scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1611eb68d0e2SZheng Liu 	if (likely(es_stats->es_stats_scan_time))
1612eb68d0e2SZheng Liu 		es_stats->es_stats_scan_time = (scan_time +
1613eb68d0e2SZheng Liu 				es_stats->es_stats_scan_time*3) / 4;
1614eb68d0e2SZheng Liu 	else
1615eb68d0e2SZheng Liu 		es_stats->es_stats_scan_time = scan_time;
1616eb68d0e2SZheng Liu 	if (scan_time > es_stats->es_stats_max_scan_time)
1617eb68d0e2SZheng Liu 		es_stats->es_stats_max_scan_time = scan_time;
1618eb68d0e2SZheng Liu 	if (likely(es_stats->es_stats_shrunk))
1619eb68d0e2SZheng Liu 		es_stats->es_stats_shrunk = (nr_shrunk +
1620eb68d0e2SZheng Liu 				es_stats->es_stats_shrunk*3) / 4;
1621eb68d0e2SZheng Liu 	else
1622eb68d0e2SZheng Liu 		es_stats->es_stats_shrunk = nr_shrunk;
1623eb68d0e2SZheng Liu 
1624edaa53caSZheng Liu 	trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time,
1625eb68d0e2SZheng Liu 			     nr_skipped, retried);
1626e15f742cSTheodore Ts'o 	return nr_shrunk;
1627e15f742cSTheodore Ts'o }
1628e15f742cSTheodore Ts'o 
ext4_es_count(struct shrinker * shrink,struct shrink_control * sc)16291ab6c499SDave Chinner static unsigned long ext4_es_count(struct shrinker *shrink,
16301ab6c499SDave Chinner 				   struct shrink_control *sc)
16311ab6c499SDave Chinner {
16321ab6c499SDave Chinner 	unsigned long nr;
16331ab6c499SDave Chinner 	struct ext4_sb_info *sbi;
16341ab6c499SDave Chinner 
16354d09d75dSQi Zheng 	sbi = shrink->private_data;
1636edaa53caSZheng Liu 	nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
1637e963bb1dSZheng Liu 	trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
16381ab6c499SDave Chinner 	return nr;
16391ab6c499SDave Chinner }
16401ab6c499SDave Chinner 
ext4_es_scan(struct shrinker * shrink,struct shrink_control * sc)16411ab6c499SDave Chinner static unsigned long ext4_es_scan(struct shrinker *shrink,
16421ab6c499SDave Chinner 				  struct shrink_control *sc)
1643e15f742cSTheodore Ts'o {
16444d09d75dSQi Zheng 	struct ext4_sb_info *sbi = shrink->private_data;
1645e15f742cSTheodore Ts'o 	int nr_to_scan = sc->nr_to_scan;
1646e15f742cSTheodore Ts'o 	int ret, nr_shrunk;
1647e15f742cSTheodore Ts'o 
1648edaa53caSZheng Liu 	ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
1649e963bb1dSZheng Liu 	trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
1650e15f742cSTheodore Ts'o 
1651edaa53caSZheng Liu 	nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
1652e15f742cSTheodore Ts'o 
16534fb7c70aSZhang Yi 	ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
1654e963bb1dSZheng Liu 	trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
16551ab6c499SDave Chinner 	return nr_shrunk;
165674cd15cdSZheng Liu }
165774cd15cdSZheng Liu 
ext4_seq_es_shrinker_info_show(struct seq_file * seq,void * v)1658ebd173beSTheodore Ts'o int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v)
165974cd15cdSZheng Liu {
1660ebd173beSTheodore Ts'o 	struct ext4_sb_info *sbi = EXT4_SB((struct super_block *) seq->private);
1661eb68d0e2SZheng Liu 	struct ext4_es_stats *es_stats = &sbi->s_es_stats;
1662eb68d0e2SZheng Liu 	struct ext4_inode_info *ei, *max = NULL;
1663eb68d0e2SZheng Liu 	unsigned int inode_cnt = 0;
1664eb68d0e2SZheng Liu 
1665eb68d0e2SZheng Liu 	if (v != SEQ_START_TOKEN)
1666eb68d0e2SZheng Liu 		return 0;
1667eb68d0e2SZheng Liu 
1668eb68d0e2SZheng Liu 	/* here we just find an inode that has the max nr. of objects */
1669edaa53caSZheng Liu 	spin_lock(&sbi->s_es_lock);
1670edaa53caSZheng Liu 	list_for_each_entry(ei, &sbi->s_es_list, i_es_list) {
1671eb68d0e2SZheng Liu 		inode_cnt++;
1672eb68d0e2SZheng Liu 		if (max && max->i_es_all_nr < ei->i_es_all_nr)
1673eb68d0e2SZheng Liu 			max = ei;
1674eb68d0e2SZheng Liu 		else if (!max)
1675eb68d0e2SZheng Liu 			max = ei;
1676eb68d0e2SZheng Liu 	}
1677edaa53caSZheng Liu 	spin_unlock(&sbi->s_es_lock);
1678eb68d0e2SZheng Liu 
1679eb68d0e2SZheng Liu 	seq_printf(seq, "stats:\n  %lld objects\n  %lld reclaimable objects\n",
1680eb68d0e2SZheng Liu 		   percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
1681edaa53caSZheng Liu 		   percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
1682520f897aSYang Guo 	seq_printf(seq, "  %lld/%lld cache hits/misses\n",
1683520f897aSYang Guo 		   percpu_counter_sum_positive(&es_stats->es_stats_cache_hits),
1684520f897aSYang Guo 		   percpu_counter_sum_positive(&es_stats->es_stats_cache_misses));
1685eb68d0e2SZheng Liu 	if (inode_cnt)
1686edaa53caSZheng Liu 		seq_printf(seq, "  %d inodes on list\n", inode_cnt);
1687eb68d0e2SZheng Liu 
1688eb68d0e2SZheng Liu 	seq_printf(seq, "average:\n  %llu us scan time\n",
1689eb68d0e2SZheng Liu 	    div_u64(es_stats->es_stats_scan_time, 1000));
1690eb68d0e2SZheng Liu 	seq_printf(seq, "  %lu shrunk objects\n", es_stats->es_stats_shrunk);
1691eb68d0e2SZheng Liu 	if (inode_cnt)
1692eb68d0e2SZheng Liu 		seq_printf(seq,
1693eb68d0e2SZheng Liu 		    "maximum:\n  %lu inode (%u objects, %u reclaimable)\n"
1694eb68d0e2SZheng Liu 		    "  %llu us max scan time\n",
1695edaa53caSZheng Liu 		    max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr,
1696eb68d0e2SZheng Liu 		    div_u64(es_stats->es_stats_max_scan_time, 1000));
1697eb68d0e2SZheng Liu 
1698eb68d0e2SZheng Liu 	return 0;
1699eb68d0e2SZheng Liu }
1700eb68d0e2SZheng Liu 
ext4_es_register_shrinker(struct ext4_sb_info * sbi)1701eb68d0e2SZheng Liu int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
1702eb68d0e2SZheng Liu {
1703eb68d0e2SZheng Liu 	int err;
1704eb68d0e2SZheng Liu 
1705624d0f1dSJan Kara 	/* Make sure we have enough bits for physical block number */
1706624d0f1dSJan Kara 	BUILD_BUG_ON(ES_SHIFT < 48);
1707edaa53caSZheng Liu 	INIT_LIST_HEAD(&sbi->s_es_list);
1708edaa53caSZheng Liu 	sbi->s_es_nr_inode = 0;
1709edaa53caSZheng Liu 	spin_lock_init(&sbi->s_es_lock);
1710eb68d0e2SZheng Liu 	sbi->s_es_stats.es_stats_shrunk = 0;
1711520f897aSYang Guo 	err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_hits, 0,
1712520f897aSYang Guo 				  GFP_KERNEL);
1713520f897aSYang Guo 	if (err)
1714520f897aSYang Guo 		return err;
1715520f897aSYang Guo 	err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_misses, 0,
1716520f897aSYang Guo 				  GFP_KERNEL);
1717520f897aSYang Guo 	if (err)
1718520f897aSYang Guo 		goto err1;
1719eb68d0e2SZheng Liu 	sbi->s_es_stats.es_stats_scan_time = 0;
1720eb68d0e2SZheng Liu 	sbi->s_es_stats.es_stats_max_scan_time = 0;
1721c2661b80SLinus Torvalds 	err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
1722eb68d0e2SZheng Liu 	if (err)
1723520f897aSYang Guo 		goto err2;
1724edaa53caSZheng Liu 	err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL);
1725eb68d0e2SZheng Liu 	if (err)
1726520f897aSYang Guo 		goto err3;
1727eb68d0e2SZheng Liu 
17284d09d75dSQi Zheng 	sbi->s_es_shrinker = shrinker_alloc(0, "ext4-es:%s", sbi->s_sb->s_id);
17294d09d75dSQi Zheng 	if (!sbi->s_es_shrinker) {
17304d09d75dSQi Zheng 		err = -ENOMEM;
1731520f897aSYang Guo 		goto err4;
17324d09d75dSQi Zheng 	}
17334d09d75dSQi Zheng 
17344d09d75dSQi Zheng 	sbi->s_es_shrinker->scan_objects = ext4_es_scan;
17354d09d75dSQi Zheng 	sbi->s_es_shrinker->count_objects = ext4_es_count;
17364d09d75dSQi Zheng 	sbi->s_es_shrinker->private_data = sbi;
17374d09d75dSQi Zheng 
17384d09d75dSQi Zheng 	shrinker_register(sbi->s_es_shrinker);
1739eb68d0e2SZheng Liu 
1740eb68d0e2SZheng Liu 	return 0;
1741520f897aSYang Guo err4:
1742edaa53caSZheng Liu 	percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
1743520f897aSYang Guo err3:
1744eb68d0e2SZheng Liu 	percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1745520f897aSYang Guo err2:
1746520f897aSYang Guo 	percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
1747520f897aSYang Guo err1:
1748520f897aSYang Guo 	percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
1749eb68d0e2SZheng Liu 	return err;
175074cd15cdSZheng Liu }
175174cd15cdSZheng Liu 
ext4_es_unregister_shrinker(struct ext4_sb_info * sbi)1752d3922a77SZheng Liu void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
175374cd15cdSZheng Liu {
1754520f897aSYang Guo 	percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
1755520f897aSYang Guo 	percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
1756eb68d0e2SZheng Liu 	percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1757edaa53caSZheng Liu 	percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
17584d09d75dSQi Zheng 	shrinker_free(sbi->s_es_shrinker);
175974cd15cdSZheng Liu }
176074cd15cdSZheng Liu 
1761dd475925SJan Kara /*
1762dd475925SJan Kara  * Shrink extents in given inode from ei->i_es_shrink_lblk till end. Scan at
1763dd475925SJan Kara  * most *nr_to_scan extents, update *nr_to_scan accordingly.
1764dd475925SJan Kara  *
1765dd475925SJan Kara  * Return 0 if we hit end of tree / interval, 1 if we exhausted nr_to_scan.
1766dd475925SJan Kara  * Increment *nr_shrunk by the number of reclaimed extents. Also update
1767dd475925SJan Kara  * ei->i_es_shrink_lblk to where we should continue scanning.
1768dd475925SJan Kara  */
es_do_reclaim_extents(struct ext4_inode_info * ei,ext4_lblk_t end,int * nr_to_scan,int * nr_shrunk)1769dd475925SJan Kara static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
1770dd475925SJan Kara 				 int *nr_to_scan, int *nr_shrunk)
177174cd15cdSZheng Liu {
177274cd15cdSZheng Liu 	struct inode *inode = &ei->vfs_inode;
177374cd15cdSZheng Liu 	struct ext4_es_tree *tree = &ei->i_es_tree;
177474cd15cdSZheng Liu 	struct extent_status *es;
1775dd475925SJan Kara 	struct rb_node *node;
1776dd475925SJan Kara 
1777dd475925SJan Kara 	es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
1778dd475925SJan Kara 	if (!es)
1779dd475925SJan Kara 		goto out_wrap;
17808fcc3a58SEric Whitney 
1781dd475925SJan Kara 	while (*nr_to_scan > 0) {
1782dd475925SJan Kara 		if (es->es_lblk > end) {
1783dd475925SJan Kara 			ei->i_es_shrink_lblk = end + 1;
1784dd475925SJan Kara 			return 0;
1785dd475925SJan Kara 		}
1786dd475925SJan Kara 
1787dd475925SJan Kara 		(*nr_to_scan)--;
1788dd475925SJan Kara 		node = rb_next(&es->rb_node);
17899649eb18SBaokun Li 
17909649eb18SBaokun Li 		if (ext4_es_must_keep(es))
17912be12de9SJan Kara 			goto next;
17922be12de9SJan Kara 		if (ext4_es_is_referenced(es)) {
17932be12de9SJan Kara 			ext4_es_clear_referenced(es);
17942be12de9SJan Kara 			goto next;
17952be12de9SJan Kara 		}
17962be12de9SJan Kara 
1797dd475925SJan Kara 		rb_erase(&es->rb_node, &tree->root);
1798dd475925SJan Kara 		ext4_es_free_extent(inode, es);
1799dd475925SJan Kara 		(*nr_shrunk)++;
18002be12de9SJan Kara next:
1801dd475925SJan Kara 		if (!node)
1802dd475925SJan Kara 			goto out_wrap;
1803dd475925SJan Kara 		es = rb_entry(node, struct extent_status, rb_node);
1804dd475925SJan Kara 	}
1805dd475925SJan Kara 	ei->i_es_shrink_lblk = es->es_lblk;
1806dd475925SJan Kara 	return 1;
1807dd475925SJan Kara out_wrap:
1808dd475925SJan Kara 	ei->i_es_shrink_lblk = 0;
1809dd475925SJan Kara 	return 0;
1810dd475925SJan Kara }
1811dd475925SJan Kara 
es_reclaim_extents(struct ext4_inode_info * ei,int * nr_to_scan)1812dd475925SJan Kara static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan)
1813dd475925SJan Kara {
1814dd475925SJan Kara 	struct inode *inode = &ei->vfs_inode;
1815dd475925SJan Kara 	int nr_shrunk = 0;
1816dd475925SJan Kara 	ext4_lblk_t start = ei->i_es_shrink_lblk;
18177869a4a6STheodore Ts'o 	static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
18187869a4a6STheodore Ts'o 				      DEFAULT_RATELIMIT_BURST);
181974cd15cdSZheng Liu 
1820edaa53caSZheng Liu 	if (ei->i_es_shk_nr == 0)
182174cd15cdSZheng Liu 		return 0;
182274cd15cdSZheng Liu 
18237869a4a6STheodore Ts'o 	if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
18247869a4a6STheodore Ts'o 	    __ratelimit(&_rs))
18257869a4a6STheodore Ts'o 		ext4_warning(inode->i_sb, "forced shrink of precached extents");
18267869a4a6STheodore Ts'o 
1827dd475925SJan Kara 	if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) &&
1828dd475925SJan Kara 	    start != 0)
1829dd475925SJan Kara 		es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk);
1830dd475925SJan Kara 
1831dd475925SJan Kara 	ei->i_es_tree.cache_es = NULL;
183274cd15cdSZheng Liu 	return nr_shrunk;
183374cd15cdSZheng Liu }
18341dc0aa46SEric Whitney 
1835b0c013e2STheodore Ts'o /*
1836b0c013e2STheodore Ts'o  * Called to support EXT4_IOC_CLEAR_ES_CACHE.  We can only remove
1837b0c013e2STheodore Ts'o  * discretionary entries from the extent status cache.  (Some entries
1838b0c013e2STheodore Ts'o  * must be present for proper operations.)
1839b0c013e2STheodore Ts'o  */
ext4_clear_inode_es(struct inode * inode)1840b0c013e2STheodore Ts'o void ext4_clear_inode_es(struct inode *inode)
1841b0c013e2STheodore Ts'o {
1842b0c013e2STheodore Ts'o 	struct ext4_inode_info *ei = EXT4_I(inode);
1843b0c013e2STheodore Ts'o 	struct extent_status *es;
1844b0c013e2STheodore Ts'o 	struct ext4_es_tree *tree;
1845b0c013e2STheodore Ts'o 	struct rb_node *node;
1846b0c013e2STheodore Ts'o 
1847b0c013e2STheodore Ts'o 	write_lock(&ei->i_es_lock);
1848b0c013e2STheodore Ts'o 	tree = &EXT4_I(inode)->i_es_tree;
1849b0c013e2STheodore Ts'o 	tree->cache_es = NULL;
1850b0c013e2STheodore Ts'o 	node = rb_first(&tree->root);
1851b0c013e2STheodore Ts'o 	while (node) {
1852b0c013e2STheodore Ts'o 		es = rb_entry(node, struct extent_status, rb_node);
1853b0c013e2STheodore Ts'o 		node = rb_next(node);
18549649eb18SBaokun Li 		if (!ext4_es_must_keep(es)) {
1855b0c013e2STheodore Ts'o 			rb_erase(&es->rb_node, &tree->root);
1856b0c013e2STheodore Ts'o 			ext4_es_free_extent(inode, es);
1857b0c013e2STheodore Ts'o 		}
1858b0c013e2STheodore Ts'o 	}
1859b0c013e2STheodore Ts'o 	ext4_clear_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
1860b0c013e2STheodore Ts'o 	write_unlock(&ei->i_es_lock);
1861b0c013e2STheodore Ts'o }
1862b0c013e2STheodore Ts'o 
18631dc0aa46SEric Whitney #ifdef ES_DEBUG__
ext4_print_pending_tree(struct inode * inode)18641dc0aa46SEric Whitney static void ext4_print_pending_tree(struct inode *inode)
18651dc0aa46SEric Whitney {
18661dc0aa46SEric Whitney 	struct ext4_pending_tree *tree;
18671dc0aa46SEric Whitney 	struct rb_node *node;
18681dc0aa46SEric Whitney 	struct pending_reservation *pr;
18691dc0aa46SEric Whitney 
18701dc0aa46SEric Whitney 	printk(KERN_DEBUG "pending reservations for inode %lu:", inode->i_ino);
18711dc0aa46SEric Whitney 	tree = &EXT4_I(inode)->i_pending_tree;
18721dc0aa46SEric Whitney 	node = rb_first(&tree->root);
18731dc0aa46SEric Whitney 	while (node) {
18741dc0aa46SEric Whitney 		pr = rb_entry(node, struct pending_reservation, rb_node);
18751dc0aa46SEric Whitney 		printk(KERN_DEBUG " %u", pr->lclu);
18761dc0aa46SEric Whitney 		node = rb_next(node);
18771dc0aa46SEric Whitney 	}
18781dc0aa46SEric Whitney 	printk(KERN_DEBUG "\n");
18791dc0aa46SEric Whitney }
18801dc0aa46SEric Whitney #else
18811dc0aa46SEric Whitney #define ext4_print_pending_tree(inode)
18821dc0aa46SEric Whitney #endif
18831dc0aa46SEric Whitney 
ext4_init_pending(void)18841dc0aa46SEric Whitney int __init ext4_init_pending(void)
18851dc0aa46SEric Whitney {
1886060f7739SJunChao Sun 	ext4_pending_cachep = KMEM_CACHE(pending_reservation, SLAB_RECLAIM_ACCOUNT);
18871dc0aa46SEric Whitney 	if (ext4_pending_cachep == NULL)
18881dc0aa46SEric Whitney 		return -ENOMEM;
18891dc0aa46SEric Whitney 	return 0;
18901dc0aa46SEric Whitney }
18911dc0aa46SEric Whitney 
ext4_exit_pending(void)18921dc0aa46SEric Whitney void ext4_exit_pending(void)
18931dc0aa46SEric Whitney {
18941dc0aa46SEric Whitney 	kmem_cache_destroy(ext4_pending_cachep);
18951dc0aa46SEric Whitney }
18961dc0aa46SEric Whitney 
ext4_init_pending_tree(struct ext4_pending_tree * tree)18971dc0aa46SEric Whitney void ext4_init_pending_tree(struct ext4_pending_tree *tree)
18981dc0aa46SEric Whitney {
18991dc0aa46SEric Whitney 	tree->root = RB_ROOT;
19001dc0aa46SEric Whitney }
19011dc0aa46SEric Whitney 
19021dc0aa46SEric Whitney /*
19031dc0aa46SEric Whitney  * __get_pending - retrieve a pointer to a pending reservation
19041dc0aa46SEric Whitney  *
19051dc0aa46SEric Whitney  * @inode - file containing the pending cluster reservation
19061dc0aa46SEric Whitney  * @lclu - logical cluster of interest
19071dc0aa46SEric Whitney  *
19081dc0aa46SEric Whitney  * Returns a pointer to a pending reservation if it's a member of
19091dc0aa46SEric Whitney  * the set, and NULL if not.  Must be called holding i_es_lock.
19101dc0aa46SEric Whitney  */
__get_pending(struct inode * inode,ext4_lblk_t lclu)19111dc0aa46SEric Whitney static struct pending_reservation *__get_pending(struct inode *inode,
19121dc0aa46SEric Whitney 						 ext4_lblk_t lclu)
19131dc0aa46SEric Whitney {
19141dc0aa46SEric Whitney 	struct ext4_pending_tree *tree;
19151dc0aa46SEric Whitney 	struct rb_node *node;
19161dc0aa46SEric Whitney 	struct pending_reservation *pr = NULL;
19171dc0aa46SEric Whitney 
19181dc0aa46SEric Whitney 	tree = &EXT4_I(inode)->i_pending_tree;
19191dc0aa46SEric Whitney 	node = (&tree->root)->rb_node;
19201dc0aa46SEric Whitney 
19211dc0aa46SEric Whitney 	while (node) {
19221dc0aa46SEric Whitney 		pr = rb_entry(node, struct pending_reservation, rb_node);
19231dc0aa46SEric Whitney 		if (lclu < pr->lclu)
19241dc0aa46SEric Whitney 			node = node->rb_left;
19251dc0aa46SEric Whitney 		else if (lclu > pr->lclu)
19261dc0aa46SEric Whitney 			node = node->rb_right;
19271dc0aa46SEric Whitney 		else if (lclu == pr->lclu)
19281dc0aa46SEric Whitney 			return pr;
19291dc0aa46SEric Whitney 	}
19301dc0aa46SEric Whitney 	return NULL;
19311dc0aa46SEric Whitney }
19321dc0aa46SEric Whitney 
19331dc0aa46SEric Whitney /*
19341dc0aa46SEric Whitney  * __insert_pending - adds a pending cluster reservation to the set of
19351dc0aa46SEric Whitney  *                    pending reservations
19361dc0aa46SEric Whitney  *
19371dc0aa46SEric Whitney  * @inode - file containing the cluster
19381dc0aa46SEric Whitney  * @lblk - logical block in the cluster to be added
1939*8e387c89SZhang Yi  * @prealloc - preallocated pending entry
19401dc0aa46SEric Whitney  *
19411dc0aa46SEric Whitney  * Returns 0 on successful insertion and -ENOMEM on failure.  If the
19421dc0aa46SEric Whitney  * pending reservation is already in the set, returns successfully.
19431dc0aa46SEric Whitney  */
__insert_pending(struct inode * inode,ext4_lblk_t lblk,struct pending_reservation ** prealloc)1944*8e387c89SZhang Yi static int __insert_pending(struct inode *inode, ext4_lblk_t lblk,
1945*8e387c89SZhang Yi 			    struct pending_reservation **prealloc)
19461dc0aa46SEric Whitney {
19471dc0aa46SEric Whitney 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
19481dc0aa46SEric Whitney 	struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
19491dc0aa46SEric Whitney 	struct rb_node **p = &tree->root.rb_node;
19501dc0aa46SEric Whitney 	struct rb_node *parent = NULL;
19511dc0aa46SEric Whitney 	struct pending_reservation *pr;
19521dc0aa46SEric Whitney 	ext4_lblk_t lclu;
19531dc0aa46SEric Whitney 	int ret = 0;
19541dc0aa46SEric Whitney 
19551dc0aa46SEric Whitney 	lclu = EXT4_B2C(sbi, lblk);
19561dc0aa46SEric Whitney 	/* search to find parent for insertion */
19571dc0aa46SEric Whitney 	while (*p) {
19581dc0aa46SEric Whitney 		parent = *p;
19591dc0aa46SEric Whitney 		pr = rb_entry(parent, struct pending_reservation, rb_node);
19601dc0aa46SEric Whitney 
19611dc0aa46SEric Whitney 		if (lclu < pr->lclu) {
19621dc0aa46SEric Whitney 			p = &(*p)->rb_left;
19631dc0aa46SEric Whitney 		} else if (lclu > pr->lclu) {
19641dc0aa46SEric Whitney 			p = &(*p)->rb_right;
19651dc0aa46SEric Whitney 		} else {
19661dc0aa46SEric Whitney 			/* pending reservation already inserted */
19671dc0aa46SEric Whitney 			goto out;
19681dc0aa46SEric Whitney 		}
19691dc0aa46SEric Whitney 	}
19701dc0aa46SEric Whitney 
1971*8e387c89SZhang Yi 	if (likely(*prealloc == NULL)) {
1972*8e387c89SZhang Yi 		pr = __alloc_pending(false);
1973*8e387c89SZhang Yi 		if (!pr) {
19741dc0aa46SEric Whitney 			ret = -ENOMEM;
19751dc0aa46SEric Whitney 			goto out;
19761dc0aa46SEric Whitney 		}
1977*8e387c89SZhang Yi 	} else {
1978*8e387c89SZhang Yi 		pr = *prealloc;
1979*8e387c89SZhang Yi 		*prealloc = NULL;
1980*8e387c89SZhang Yi 	}
19811dc0aa46SEric Whitney 	pr->lclu = lclu;
19821dc0aa46SEric Whitney 
19831dc0aa46SEric Whitney 	rb_link_node(&pr->rb_node, parent, p);
19841dc0aa46SEric Whitney 	rb_insert_color(&pr->rb_node, &tree->root);
19851dc0aa46SEric Whitney 
19861dc0aa46SEric Whitney out:
19871dc0aa46SEric Whitney 	return ret;
19881dc0aa46SEric Whitney }
19891dc0aa46SEric Whitney 
19901dc0aa46SEric Whitney /*
19911dc0aa46SEric Whitney  * __remove_pending - removes a pending cluster reservation from the set
19921dc0aa46SEric Whitney  *                    of pending reservations
19931dc0aa46SEric Whitney  *
19941dc0aa46SEric Whitney  * @inode - file containing the cluster
19951dc0aa46SEric Whitney  * @lblk - logical block in the pending cluster reservation to be removed
19961dc0aa46SEric Whitney  *
19971dc0aa46SEric Whitney  * Returns successfully if pending reservation is not a member of the set.
19981dc0aa46SEric Whitney  */
__remove_pending(struct inode * inode,ext4_lblk_t lblk)19991dc0aa46SEric Whitney static void __remove_pending(struct inode *inode, ext4_lblk_t lblk)
20001dc0aa46SEric Whitney {
20011dc0aa46SEric Whitney 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
20021dc0aa46SEric Whitney 	struct pending_reservation *pr;
20031dc0aa46SEric Whitney 	struct ext4_pending_tree *tree;
20041dc0aa46SEric Whitney 
20051dc0aa46SEric Whitney 	pr = __get_pending(inode, EXT4_B2C(sbi, lblk));
20061dc0aa46SEric Whitney 	if (pr != NULL) {
20071dc0aa46SEric Whitney 		tree = &EXT4_I(inode)->i_pending_tree;
20081dc0aa46SEric Whitney 		rb_erase(&pr->rb_node, &tree->root);
2009*8e387c89SZhang Yi 		__free_pending(pr);
20101dc0aa46SEric Whitney 	}
20111dc0aa46SEric Whitney }
20121dc0aa46SEric Whitney 
20131dc0aa46SEric Whitney /*
20141dc0aa46SEric Whitney  * ext4_remove_pending - removes a pending cluster reservation from the set
20151dc0aa46SEric Whitney  *                       of pending reservations
20161dc0aa46SEric Whitney  *
20171dc0aa46SEric Whitney  * @inode - file containing the cluster
20181dc0aa46SEric Whitney  * @lblk - logical block in the pending cluster reservation to be removed
20191dc0aa46SEric Whitney  *
20201dc0aa46SEric Whitney  * Locking for external use of __remove_pending.
20211dc0aa46SEric Whitney  */
ext4_remove_pending(struct inode * inode,ext4_lblk_t lblk)20221dc0aa46SEric Whitney void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk)
20231dc0aa46SEric Whitney {
20241dc0aa46SEric Whitney 	struct ext4_inode_info *ei = EXT4_I(inode);
20251dc0aa46SEric Whitney 
20261dc0aa46SEric Whitney 	write_lock(&ei->i_es_lock);
20271dc0aa46SEric Whitney 	__remove_pending(inode, lblk);
20281dc0aa46SEric Whitney 	write_unlock(&ei->i_es_lock);
20291dc0aa46SEric Whitney }
20301dc0aa46SEric Whitney 
20311dc0aa46SEric Whitney /*
20321dc0aa46SEric Whitney  * ext4_is_pending - determine whether a cluster has a pending reservation
20331dc0aa46SEric Whitney  *                   on it
20341dc0aa46SEric Whitney  *
20351dc0aa46SEric Whitney  * @inode - file containing the cluster
20361dc0aa46SEric Whitney  * @lblk - logical block in the cluster
20371dc0aa46SEric Whitney  *
20381dc0aa46SEric Whitney  * Returns true if there's a pending reservation for the cluster in the
20391dc0aa46SEric Whitney  * set of pending reservations, and false if not.
20401dc0aa46SEric Whitney  */
ext4_is_pending(struct inode * inode,ext4_lblk_t lblk)20411dc0aa46SEric Whitney bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk)
20421dc0aa46SEric Whitney {
20431dc0aa46SEric Whitney 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
20441dc0aa46SEric Whitney 	struct ext4_inode_info *ei = EXT4_I(inode);
20451dc0aa46SEric Whitney 	bool ret;
20461dc0aa46SEric Whitney 
20471dc0aa46SEric Whitney 	read_lock(&ei->i_es_lock);
20481dc0aa46SEric Whitney 	ret = (bool)(__get_pending(inode, EXT4_B2C(sbi, lblk)) != NULL);
20491dc0aa46SEric Whitney 	read_unlock(&ei->i_es_lock);
20501dc0aa46SEric Whitney 
20511dc0aa46SEric Whitney 	return ret;
20521dc0aa46SEric Whitney }
20530b02f4c0SEric Whitney 
20540b02f4c0SEric Whitney /*
20550b02f4c0SEric Whitney  * ext4_es_insert_delayed_block - adds a delayed block to the extents status
20560b02f4c0SEric Whitney  *                                tree, adding a pending reservation where
20570b02f4c0SEric Whitney  *                                needed
20580b02f4c0SEric Whitney  *
20590b02f4c0SEric Whitney  * @inode - file containing the newly added block
20600b02f4c0SEric Whitney  * @lblk - logical block to be added
20610b02f4c0SEric Whitney  * @allocated - indicates whether a physical cluster has been allocated for
20620b02f4c0SEric Whitney  *              the logical cluster that contains the block
20630b02f4c0SEric Whitney  */
ext4_es_insert_delayed_block(struct inode * inode,ext4_lblk_t lblk,bool allocated)20648782b020SBaokun Li void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
20650b02f4c0SEric Whitney 				  bool allocated)
20660b02f4c0SEric Whitney {
20670b02f4c0SEric Whitney 	struct extent_status newes;
2068*8e387c89SZhang Yi 	int err1 = 0, err2 = 0, err3 = 0;
20694a2d9844SBaokun Li 	struct extent_status *es1 = NULL;
20704a2d9844SBaokun Li 	struct extent_status *es2 = NULL;
2071*8e387c89SZhang Yi 	struct pending_reservation *pr = NULL;
20720b02f4c0SEric Whitney 
20738016e29fSHarshad Shirwadkar 	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
20748782b020SBaokun Li 		return;
20758016e29fSHarshad Shirwadkar 
20760b02f4c0SEric Whitney 	es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",
20770b02f4c0SEric Whitney 		 lblk, inode->i_ino);
20780b02f4c0SEric Whitney 
20790b02f4c0SEric Whitney 	newes.es_lblk = lblk;
20800b02f4c0SEric Whitney 	newes.es_len = 1;
20810b02f4c0SEric Whitney 	ext4_es_store_pblock_status(&newes, ~0, EXTENT_STATUS_DELAYED);
20820b02f4c0SEric Whitney 	trace_ext4_es_insert_delayed_block(inode, &newes, allocated);
20830b02f4c0SEric Whitney 
20840b02f4c0SEric Whitney 	ext4_es_insert_extent_check(inode, &newes);
20850b02f4c0SEric Whitney 
20864a2d9844SBaokun Li retry:
20874a2d9844SBaokun Li 	if (err1 && !es1)
20884a2d9844SBaokun Li 		es1 = __es_alloc_extent(true);
20894a2d9844SBaokun Li 	if ((err1 || err2) && !es2)
20904a2d9844SBaokun Li 		es2 = __es_alloc_extent(true);
2091*8e387c89SZhang Yi 	if ((err1 || err2 || err3) && allocated && !pr)
2092*8e387c89SZhang Yi 		pr = __alloc_pending(true);
20930b02f4c0SEric Whitney 	write_lock(&EXT4_I(inode)->i_es_lock);
20940b02f4c0SEric Whitney 
20954a2d9844SBaokun Li 	err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
20964a2d9844SBaokun Li 	if (err1 != 0)
20970b02f4c0SEric Whitney 		goto error;
2098768d612fSBaokun Li 	/* Free preallocated extent if it didn't get used. */
2099768d612fSBaokun Li 	if (es1) {
2100768d612fSBaokun Li 		if (!es1->es_len)
2101768d612fSBaokun Li 			__es_free_extent(es1);
2102768d612fSBaokun Li 		es1 = NULL;
2103768d612fSBaokun Li 	}
21044a2d9844SBaokun Li 
21054a2d9844SBaokun Li 	err2 = __es_insert_extent(inode, &newes, es2);
21064a2d9844SBaokun Li 	if (err2 != 0)
21070b02f4c0SEric Whitney 		goto error;
2108768d612fSBaokun Li 	/* Free preallocated extent if it didn't get used. */
2109768d612fSBaokun Li 	if (es2) {
2110768d612fSBaokun Li 		if (!es2->es_len)
2111768d612fSBaokun Li 			__es_free_extent(es2);
2112768d612fSBaokun Li 		es2 = NULL;
2113768d612fSBaokun Li 	}
21140b02f4c0SEric Whitney 
2115*8e387c89SZhang Yi 	if (allocated) {
2116*8e387c89SZhang Yi 		err3 = __insert_pending(inode, lblk, &pr);
2117*8e387c89SZhang Yi 		if (err3 != 0)
2118*8e387c89SZhang Yi 			goto error;
2119*8e387c89SZhang Yi 		if (pr) {
2120*8e387c89SZhang Yi 			__free_pending(pr);
2121*8e387c89SZhang Yi 			pr = NULL;
2122*8e387c89SZhang Yi 		}
2123*8e387c89SZhang Yi 	}
21240b02f4c0SEric Whitney error:
21250b02f4c0SEric Whitney 	write_unlock(&EXT4_I(inode)->i_es_lock);
2126*8e387c89SZhang Yi 	if (err1 || err2 || err3)
21274a2d9844SBaokun Li 		goto retry;
21280b02f4c0SEric Whitney 
21290b02f4c0SEric Whitney 	ext4_es_print_tree(inode);
21300b02f4c0SEric Whitney 	ext4_print_pending_tree(inode);
21318782b020SBaokun Li 	return;
21320b02f4c0SEric Whitney }
2133b6bf9171SEric Whitney 
2134b6bf9171SEric Whitney /*
2135b6bf9171SEric Whitney  * __es_delayed_clu - count number of clusters containing blocks that
2136b6bf9171SEric Whitney  *                    are delayed only
2137b6bf9171SEric Whitney  *
2138b6bf9171SEric Whitney  * @inode - file containing block range
2139b6bf9171SEric Whitney  * @start - logical block defining start of range
2140b6bf9171SEric Whitney  * @end - logical block defining end of range
2141b6bf9171SEric Whitney  *
2142b6bf9171SEric Whitney  * Returns the number of clusters containing only delayed (not delayed
2143b6bf9171SEric Whitney  * and unwritten) blocks in the range specified by @start and @end.  Any
2144b6bf9171SEric Whitney  * cluster or part of a cluster within the range and containing a delayed
2145b6bf9171SEric Whitney  * and not unwritten block within the range is counted as a whole cluster.
2146b6bf9171SEric Whitney  */
__es_delayed_clu(struct inode * inode,ext4_lblk_t start,ext4_lblk_t end)2147b6bf9171SEric Whitney static unsigned int __es_delayed_clu(struct inode *inode, ext4_lblk_t start,
2148b6bf9171SEric Whitney 				     ext4_lblk_t end)
2149b6bf9171SEric Whitney {
2150b6bf9171SEric Whitney 	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
2151b6bf9171SEric Whitney 	struct extent_status *es;
2152b6bf9171SEric Whitney 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2153b6bf9171SEric Whitney 	struct rb_node *node;
2154b6bf9171SEric Whitney 	ext4_lblk_t first_lclu, last_lclu;
2155b6bf9171SEric Whitney 	unsigned long long last_counted_lclu;
2156b6bf9171SEric Whitney 	unsigned int n = 0;
2157b6bf9171SEric Whitney 
2158b6bf9171SEric Whitney 	/* guaranteed to be unequal to any ext4_lblk_t value */
2159b6bf9171SEric Whitney 	last_counted_lclu = ~0ULL;
2160b6bf9171SEric Whitney 
2161b6bf9171SEric Whitney 	es = __es_tree_search(&tree->root, start);
2162b6bf9171SEric Whitney 
2163b6bf9171SEric Whitney 	while (es && (es->es_lblk <= end)) {
2164b6bf9171SEric Whitney 		if (ext4_es_is_delonly(es)) {
2165b6bf9171SEric Whitney 			if (es->es_lblk <= start)
2166b6bf9171SEric Whitney 				first_lclu = EXT4_B2C(sbi, start);
2167b6bf9171SEric Whitney 			else
2168b6bf9171SEric Whitney 				first_lclu = EXT4_B2C(sbi, es->es_lblk);
2169b6bf9171SEric Whitney 
2170b6bf9171SEric Whitney 			if (ext4_es_end(es) >= end)
2171b6bf9171SEric Whitney 				last_lclu = EXT4_B2C(sbi, end);
2172b6bf9171SEric Whitney 			else
2173b6bf9171SEric Whitney 				last_lclu = EXT4_B2C(sbi, ext4_es_end(es));
2174b6bf9171SEric Whitney 
2175b6bf9171SEric Whitney 			if (first_lclu == last_counted_lclu)
2176b6bf9171SEric Whitney 				n += last_lclu - first_lclu;
2177b6bf9171SEric Whitney 			else
2178b6bf9171SEric Whitney 				n += last_lclu - first_lclu + 1;
2179b6bf9171SEric Whitney 			last_counted_lclu = last_lclu;
2180b6bf9171SEric Whitney 		}
2181b6bf9171SEric Whitney 		node = rb_next(&es->rb_node);
2182b6bf9171SEric Whitney 		if (!node)
2183b6bf9171SEric Whitney 			break;
2184b6bf9171SEric Whitney 		es = rb_entry(node, struct extent_status, rb_node);
2185b6bf9171SEric Whitney 	}
2186b6bf9171SEric Whitney 
2187b6bf9171SEric Whitney 	return n;
2188b6bf9171SEric Whitney }
2189b6bf9171SEric Whitney 
2190b6bf9171SEric Whitney /*
2191b6bf9171SEric Whitney  * ext4_es_delayed_clu - count number of clusters containing blocks that
2192b6bf9171SEric Whitney  *                       are both delayed and unwritten
2193b6bf9171SEric Whitney  *
2194b6bf9171SEric Whitney  * @inode - file containing block range
2195b6bf9171SEric Whitney  * @lblk - logical block defining start of range
2196b6bf9171SEric Whitney  * @len - number of blocks in range
2197b6bf9171SEric Whitney  *
2198b6bf9171SEric Whitney  * Locking for external use of __es_delayed_clu().
2199b6bf9171SEric Whitney  */
ext4_es_delayed_clu(struct inode * inode,ext4_lblk_t lblk,ext4_lblk_t len)2200b6bf9171SEric Whitney unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
2201b6bf9171SEric Whitney 				 ext4_lblk_t len)
2202b6bf9171SEric Whitney {
2203b6bf9171SEric Whitney 	struct ext4_inode_info *ei = EXT4_I(inode);
2204b6bf9171SEric Whitney 	ext4_lblk_t end;
2205b6bf9171SEric Whitney 	unsigned int n;
2206b6bf9171SEric Whitney 
2207b6bf9171SEric Whitney 	if (len == 0)
2208b6bf9171SEric Whitney 		return 0;
2209b6bf9171SEric Whitney 
2210b6bf9171SEric Whitney 	end = lblk + len - 1;
2211b6bf9171SEric Whitney 	WARN_ON(end < lblk);
2212b6bf9171SEric Whitney 
2213b6bf9171SEric Whitney 	read_lock(&ei->i_es_lock);
2214b6bf9171SEric Whitney 
2215b6bf9171SEric Whitney 	n = __es_delayed_clu(inode, lblk, end);
2216b6bf9171SEric Whitney 
2217b6bf9171SEric Whitney 	read_unlock(&ei->i_es_lock);
2218b6bf9171SEric Whitney 
2219b6bf9171SEric Whitney 	return n;
2220b6bf9171SEric Whitney }
2221b6bf9171SEric Whitney 
2222b6bf9171SEric Whitney /*
2223b6bf9171SEric Whitney  * __revise_pending - makes, cancels, or leaves unchanged pending cluster
2224b6bf9171SEric Whitney  *                    reservations for a specified block range depending
2225b6bf9171SEric Whitney  *                    upon the presence or absence of delayed blocks
2226b6bf9171SEric Whitney  *                    outside the range within clusters at the ends of the
2227b6bf9171SEric Whitney  *                    range
2228b6bf9171SEric Whitney  *
2229b6bf9171SEric Whitney  * @inode - file containing the range
2230b6bf9171SEric Whitney  * @lblk - logical block defining the start of range
2231b6bf9171SEric Whitney  * @len  - length of range in blocks
2232*8e387c89SZhang Yi  * @prealloc - preallocated pending entry
2233b6bf9171SEric Whitney  *
2234b6bf9171SEric Whitney  * Used after a newly allocated extent is added to the extents status tree.
2235b6bf9171SEric Whitney  * Requires that the extents in the range have either written or unwritten
2236b6bf9171SEric Whitney  * status.  Must be called while holding i_es_lock.
2237b6bf9171SEric Whitney  */
__revise_pending(struct inode * inode,ext4_lblk_t lblk,ext4_lblk_t len,struct pending_reservation ** prealloc)2238*8e387c89SZhang Yi static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
2239*8e387c89SZhang Yi 			    ext4_lblk_t len,
2240*8e387c89SZhang Yi 			    struct pending_reservation **prealloc)
2241b6bf9171SEric Whitney {
2242b6bf9171SEric Whitney 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2243b6bf9171SEric Whitney 	ext4_lblk_t end = lblk + len - 1;
2244b6bf9171SEric Whitney 	ext4_lblk_t first, last;
2245b6bf9171SEric Whitney 	bool f_del = false, l_del = false;
2246*8e387c89SZhang Yi 	int ret = 0;
2247b6bf9171SEric Whitney 
2248b6bf9171SEric Whitney 	if (len == 0)
2249*8e387c89SZhang Yi 		return 0;
2250b6bf9171SEric Whitney 
2251b6bf9171SEric Whitney 	/*
2252b6bf9171SEric Whitney 	 * Two cases - block range within single cluster and block range
2253b6bf9171SEric Whitney 	 * spanning two or more clusters.  Note that a cluster belonging
2254b6bf9171SEric Whitney 	 * to a range starting and/or ending on a cluster boundary is treated
2255b6bf9171SEric Whitney 	 * as if it does not contain a delayed extent.  The new range may
2256b6bf9171SEric Whitney 	 * have allocated space for previously delayed blocks out to the
2257b6bf9171SEric Whitney 	 * cluster boundary, requiring that any pre-existing pending
2258b6bf9171SEric Whitney 	 * reservation be canceled.  Because this code only looks at blocks
2259b6bf9171SEric Whitney 	 * outside the range, it should revise pending reservations
2260b6bf9171SEric Whitney 	 * correctly even if the extent represented by the range can't be
2261b6bf9171SEric Whitney 	 * inserted in the extents status tree due to ENOSPC.
2262b6bf9171SEric Whitney 	 */
2263b6bf9171SEric Whitney 
2264b6bf9171SEric Whitney 	if (EXT4_B2C(sbi, lblk) == EXT4_B2C(sbi, end)) {
2265b6bf9171SEric Whitney 		first = EXT4_LBLK_CMASK(sbi, lblk);
2266b6bf9171SEric Whitney 		if (first != lblk)
2267b6bf9171SEric Whitney 			f_del = __es_scan_range(inode, &ext4_es_is_delonly,
2268b6bf9171SEric Whitney 						first, lblk - 1);
2269b6bf9171SEric Whitney 		if (f_del) {
2270*8e387c89SZhang Yi 			ret = __insert_pending(inode, first, prealloc);
2271*8e387c89SZhang Yi 			if (ret < 0)
2272*8e387c89SZhang Yi 				goto out;
2273b6bf9171SEric Whitney 		} else {
2274b6bf9171SEric Whitney 			last = EXT4_LBLK_CMASK(sbi, end) +
2275b6bf9171SEric Whitney 			       sbi->s_cluster_ratio - 1;
2276b6bf9171SEric Whitney 			if (last != end)
2277b6bf9171SEric Whitney 				l_del = __es_scan_range(inode,
2278b6bf9171SEric Whitney 							&ext4_es_is_delonly,
2279b6bf9171SEric Whitney 							end + 1, last);
2280*8e387c89SZhang Yi 			if (l_del) {
2281*8e387c89SZhang Yi 				ret = __insert_pending(inode, last, prealloc);
2282*8e387c89SZhang Yi 				if (ret < 0)
2283*8e387c89SZhang Yi 					goto out;
2284*8e387c89SZhang Yi 			} else
2285b6bf9171SEric Whitney 				__remove_pending(inode, last);
2286b6bf9171SEric Whitney 		}
2287b6bf9171SEric Whitney 	} else {
2288b6bf9171SEric Whitney 		first = EXT4_LBLK_CMASK(sbi, lblk);
2289b6bf9171SEric Whitney 		if (first != lblk)
2290b6bf9171SEric Whitney 			f_del = __es_scan_range(inode, &ext4_es_is_delonly,
2291b6bf9171SEric Whitney 						first, lblk - 1);
2292*8e387c89SZhang Yi 		if (f_del) {
2293*8e387c89SZhang Yi 			ret = __insert_pending(inode, first, prealloc);
2294*8e387c89SZhang Yi 			if (ret < 0)
2295*8e387c89SZhang Yi 				goto out;
2296*8e387c89SZhang Yi 		} else
2297b6bf9171SEric Whitney 			__remove_pending(inode, first);
2298b6bf9171SEric Whitney 
2299b6bf9171SEric Whitney 		last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
2300b6bf9171SEric Whitney 		if (last != end)
2301b6bf9171SEric Whitney 			l_del = __es_scan_range(inode, &ext4_es_is_delonly,
2302b6bf9171SEric Whitney 						end + 1, last);
2303*8e387c89SZhang Yi 		if (l_del) {
2304*8e387c89SZhang Yi 			ret = __insert_pending(inode, last, prealloc);
2305*8e387c89SZhang Yi 			if (ret < 0)
2306*8e387c89SZhang Yi 				goto out;
2307*8e387c89SZhang Yi 		} else
2308b6bf9171SEric Whitney 			__remove_pending(inode, last);
2309b6bf9171SEric Whitney 	}
2310*8e387c89SZhang Yi out:
2311*8e387c89SZhang Yi 	return ret;
2312b6bf9171SEric Whitney }
2313