xref: /linux/drivers/md/dm-pcache/cache_key.c (revision 4f38da1f027ea2c9f01bb71daa7a299c191b6940)
1*1d57628fSDongsheng Yang // SPDX-License-Identifier: GPL-2.0-or-later
2*1d57628fSDongsheng Yang #include "cache.h"
3*1d57628fSDongsheng Yang #include "backing_dev.h"
4*1d57628fSDongsheng Yang #include "cache_dev.h"
5*1d57628fSDongsheng Yang #include "dm_pcache.h"
6*1d57628fSDongsheng Yang 
7*1d57628fSDongsheng Yang struct pcache_cache_kset_onmedia pcache_empty_kset = { 0 };
8*1d57628fSDongsheng Yang 
9*1d57628fSDongsheng Yang void cache_key_init(struct pcache_cache_tree *cache_tree, struct pcache_cache_key *key)
10*1d57628fSDongsheng Yang {
11*1d57628fSDongsheng Yang 	kref_init(&key->ref);
12*1d57628fSDongsheng Yang 	key->cache_tree = cache_tree;
13*1d57628fSDongsheng Yang 	INIT_LIST_HEAD(&key->list_node);
14*1d57628fSDongsheng Yang 	RB_CLEAR_NODE(&key->rb_node);
15*1d57628fSDongsheng Yang }
16*1d57628fSDongsheng Yang 
17*1d57628fSDongsheng Yang struct pcache_cache_key *cache_key_alloc(struct pcache_cache_tree *cache_tree, gfp_t gfp_mask)
18*1d57628fSDongsheng Yang {
19*1d57628fSDongsheng Yang 	struct pcache_cache_key *key;
20*1d57628fSDongsheng Yang 
21*1d57628fSDongsheng Yang 	key = mempool_alloc(&cache_tree->key_pool, gfp_mask);
22*1d57628fSDongsheng Yang 	if (!key)
23*1d57628fSDongsheng Yang 		return NULL;
24*1d57628fSDongsheng Yang 
25*1d57628fSDongsheng Yang 	memset(key, 0, sizeof(struct pcache_cache_key));
26*1d57628fSDongsheng Yang 	cache_key_init(cache_tree, key);
27*1d57628fSDongsheng Yang 
28*1d57628fSDongsheng Yang 	return key;
29*1d57628fSDongsheng Yang }
30*1d57628fSDongsheng Yang 
31*1d57628fSDongsheng Yang /**
32*1d57628fSDongsheng Yang  * cache_key_get - Increment the reference count of a cache key.
33*1d57628fSDongsheng Yang  * @key: Pointer to the pcache_cache_key structure.
34*1d57628fSDongsheng Yang  *
35*1d57628fSDongsheng Yang  * This function increments the reference count of the specified cache key,
36*1d57628fSDongsheng Yang  * ensuring that it is not freed while still in use.
37*1d57628fSDongsheng Yang  */
38*1d57628fSDongsheng Yang void cache_key_get(struct pcache_cache_key *key)
39*1d57628fSDongsheng Yang {
40*1d57628fSDongsheng Yang 	kref_get(&key->ref);
41*1d57628fSDongsheng Yang }
42*1d57628fSDongsheng Yang 
43*1d57628fSDongsheng Yang /**
44*1d57628fSDongsheng Yang  * cache_key_destroy - Free a cache key structure when its reference count drops to zero.
45*1d57628fSDongsheng Yang  * @ref: Pointer to the kref structure.
46*1d57628fSDongsheng Yang  *
47*1d57628fSDongsheng Yang  * This function is called when the reference count of the cache key reaches zero.
48*1d57628fSDongsheng Yang  * It frees the allocated cache key back to the slab cache.
49*1d57628fSDongsheng Yang  */
50*1d57628fSDongsheng Yang static void cache_key_destroy(struct kref *ref)
51*1d57628fSDongsheng Yang {
52*1d57628fSDongsheng Yang 	struct pcache_cache_key *key = container_of(ref, struct pcache_cache_key, ref);
53*1d57628fSDongsheng Yang 	struct pcache_cache_tree *cache_tree = key->cache_tree;
54*1d57628fSDongsheng Yang 
55*1d57628fSDongsheng Yang 	mempool_free(key, &cache_tree->key_pool);
56*1d57628fSDongsheng Yang }
57*1d57628fSDongsheng Yang 
58*1d57628fSDongsheng Yang void cache_key_put(struct pcache_cache_key *key)
59*1d57628fSDongsheng Yang {
60*1d57628fSDongsheng Yang 	kref_put(&key->ref, cache_key_destroy);
61*1d57628fSDongsheng Yang }
62*1d57628fSDongsheng Yang 
63*1d57628fSDongsheng Yang void cache_pos_advance(struct pcache_cache_pos *pos, u32 len)
64*1d57628fSDongsheng Yang {
65*1d57628fSDongsheng Yang 	/* Ensure enough space remains in the current segment */
66*1d57628fSDongsheng Yang 	BUG_ON(cache_seg_remain(pos) < len);
67*1d57628fSDongsheng Yang 
68*1d57628fSDongsheng Yang 	pos->seg_off += len;
69*1d57628fSDongsheng Yang }
70*1d57628fSDongsheng Yang 
71*1d57628fSDongsheng Yang static void cache_key_encode(struct pcache_cache *cache,
72*1d57628fSDongsheng Yang 			     struct pcache_cache_key_onmedia *key_onmedia,
73*1d57628fSDongsheng Yang 			     struct pcache_cache_key *key)
74*1d57628fSDongsheng Yang {
75*1d57628fSDongsheng Yang 	key_onmedia->off = key->off;
76*1d57628fSDongsheng Yang 	key_onmedia->len = key->len;
77*1d57628fSDongsheng Yang 
78*1d57628fSDongsheng Yang 	key_onmedia->cache_seg_id = key->cache_pos.cache_seg->cache_seg_id;
79*1d57628fSDongsheng Yang 	key_onmedia->cache_seg_off = key->cache_pos.seg_off;
80*1d57628fSDongsheng Yang 
81*1d57628fSDongsheng Yang 	key_onmedia->seg_gen = key->seg_gen;
82*1d57628fSDongsheng Yang 	key_onmedia->flags = key->flags;
83*1d57628fSDongsheng Yang 
84*1d57628fSDongsheng Yang 	if (cache_data_crc_on(cache))
85*1d57628fSDongsheng Yang 		key_onmedia->data_crc = cache_key_data_crc(key);
86*1d57628fSDongsheng Yang }
87*1d57628fSDongsheng Yang 
88*1d57628fSDongsheng Yang int cache_key_decode(struct pcache_cache *cache,
89*1d57628fSDongsheng Yang 			struct pcache_cache_key_onmedia *key_onmedia,
90*1d57628fSDongsheng Yang 			struct pcache_cache_key *key)
91*1d57628fSDongsheng Yang {
92*1d57628fSDongsheng Yang 	struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
93*1d57628fSDongsheng Yang 
94*1d57628fSDongsheng Yang 	key->off = key_onmedia->off;
95*1d57628fSDongsheng Yang 	key->len = key_onmedia->len;
96*1d57628fSDongsheng Yang 
97*1d57628fSDongsheng Yang 	key->cache_pos.cache_seg = &cache->segments[key_onmedia->cache_seg_id];
98*1d57628fSDongsheng Yang 	key->cache_pos.seg_off = key_onmedia->cache_seg_off;
99*1d57628fSDongsheng Yang 
100*1d57628fSDongsheng Yang 	key->seg_gen = key_onmedia->seg_gen;
101*1d57628fSDongsheng Yang 	key->flags = key_onmedia->flags;
102*1d57628fSDongsheng Yang 
103*1d57628fSDongsheng Yang 	if (cache_data_crc_on(cache) &&
104*1d57628fSDongsheng Yang 			key_onmedia->data_crc != cache_key_data_crc(key)) {
105*1d57628fSDongsheng Yang 		pcache_dev_err(pcache, "key: %llu:%u seg %u:%u data_crc error: %x, expected: %x\n",
106*1d57628fSDongsheng Yang 				key->off, key->len, key->cache_pos.cache_seg->cache_seg_id,
107*1d57628fSDongsheng Yang 				key->cache_pos.seg_off, cache_key_data_crc(key), key_onmedia->data_crc);
108*1d57628fSDongsheng Yang 		return -EIO;
109*1d57628fSDongsheng Yang 	}
110*1d57628fSDongsheng Yang 
111*1d57628fSDongsheng Yang 	return 0;
112*1d57628fSDongsheng Yang }
113*1d57628fSDongsheng Yang 
114*1d57628fSDongsheng Yang static void append_last_kset(struct pcache_cache *cache, u32 next_seg)
115*1d57628fSDongsheng Yang {
116*1d57628fSDongsheng Yang 	struct pcache_cache_kset_onmedia kset_onmedia = { 0 };
117*1d57628fSDongsheng Yang 
118*1d57628fSDongsheng Yang 	kset_onmedia.flags |= PCACHE_KSET_FLAGS_LAST;
119*1d57628fSDongsheng Yang 	kset_onmedia.next_cache_seg_id = next_seg;
120*1d57628fSDongsheng Yang 	kset_onmedia.magic = PCACHE_KSET_MAGIC;
121*1d57628fSDongsheng Yang 	kset_onmedia.crc = cache_kset_crc(&kset_onmedia);
122*1d57628fSDongsheng Yang 
123*1d57628fSDongsheng Yang 	memcpy_flushcache(get_key_head_addr(cache), &kset_onmedia, sizeof(struct pcache_cache_kset_onmedia));
124*1d57628fSDongsheng Yang 	pmem_wmb();
125*1d57628fSDongsheng Yang 	cache_pos_advance(&cache->key_head, sizeof(struct pcache_cache_kset_onmedia));
126*1d57628fSDongsheng Yang }
127*1d57628fSDongsheng Yang 
128*1d57628fSDongsheng Yang int cache_kset_close(struct pcache_cache *cache, struct pcache_cache_kset *kset)
129*1d57628fSDongsheng Yang {
130*1d57628fSDongsheng Yang 	struct pcache_cache_kset_onmedia *kset_onmedia;
131*1d57628fSDongsheng Yang 	u32 kset_onmedia_size;
132*1d57628fSDongsheng Yang 	int ret;
133*1d57628fSDongsheng Yang 
134*1d57628fSDongsheng Yang 	kset_onmedia = &kset->kset_onmedia;
135*1d57628fSDongsheng Yang 
136*1d57628fSDongsheng Yang 	if (!kset_onmedia->key_num)
137*1d57628fSDongsheng Yang 		return 0;
138*1d57628fSDongsheng Yang 
139*1d57628fSDongsheng Yang 	kset_onmedia_size = struct_size(kset_onmedia, data, kset_onmedia->key_num);
140*1d57628fSDongsheng Yang 
141*1d57628fSDongsheng Yang 	spin_lock(&cache->key_head_lock);
142*1d57628fSDongsheng Yang again:
143*1d57628fSDongsheng Yang 	/* Reserve space for the last kset */
144*1d57628fSDongsheng Yang 	if (cache_seg_remain(&cache->key_head) < kset_onmedia_size + sizeof(struct pcache_cache_kset_onmedia)) {
145*1d57628fSDongsheng Yang 		struct pcache_cache_segment *next_seg;
146*1d57628fSDongsheng Yang 
147*1d57628fSDongsheng Yang 		next_seg = get_cache_segment(cache);
148*1d57628fSDongsheng Yang 		if (!next_seg) {
149*1d57628fSDongsheng Yang 			ret = -EBUSY;
150*1d57628fSDongsheng Yang 			goto out;
151*1d57628fSDongsheng Yang 		}
152*1d57628fSDongsheng Yang 
153*1d57628fSDongsheng Yang 		/* clear outdated kset in next seg */
154*1d57628fSDongsheng Yang 		memcpy_flushcache(next_seg->segment.data, &pcache_empty_kset,
155*1d57628fSDongsheng Yang 					sizeof(struct pcache_cache_kset_onmedia));
156*1d57628fSDongsheng Yang 		append_last_kset(cache, next_seg->cache_seg_id);
157*1d57628fSDongsheng Yang 		cache->key_head.cache_seg = next_seg;
158*1d57628fSDongsheng Yang 		cache->key_head.seg_off = 0;
159*1d57628fSDongsheng Yang 		goto again;
160*1d57628fSDongsheng Yang 	}
161*1d57628fSDongsheng Yang 
162*1d57628fSDongsheng Yang 	kset_onmedia->magic = PCACHE_KSET_MAGIC;
163*1d57628fSDongsheng Yang 	kset_onmedia->crc = cache_kset_crc(kset_onmedia);
164*1d57628fSDongsheng Yang 
165*1d57628fSDongsheng Yang 	/* clear outdated kset after current kset */
166*1d57628fSDongsheng Yang 	memcpy_flushcache(get_key_head_addr(cache) + kset_onmedia_size, &pcache_empty_kset,
167*1d57628fSDongsheng Yang 				sizeof(struct pcache_cache_kset_onmedia));
168*1d57628fSDongsheng Yang 	/* write current kset into segment */
169*1d57628fSDongsheng Yang 	memcpy_flushcache(get_key_head_addr(cache), kset_onmedia, kset_onmedia_size);
170*1d57628fSDongsheng Yang 	pmem_wmb();
171*1d57628fSDongsheng Yang 
172*1d57628fSDongsheng Yang 	/* reset kset_onmedia */
173*1d57628fSDongsheng Yang 	memset(kset_onmedia, 0, sizeof(struct pcache_cache_kset_onmedia));
174*1d57628fSDongsheng Yang 	cache_pos_advance(&cache->key_head, kset_onmedia_size);
175*1d57628fSDongsheng Yang 
176*1d57628fSDongsheng Yang 	ret = 0;
177*1d57628fSDongsheng Yang out:
178*1d57628fSDongsheng Yang 	spin_unlock(&cache->key_head_lock);
179*1d57628fSDongsheng Yang 
180*1d57628fSDongsheng Yang 	return ret;
181*1d57628fSDongsheng Yang }
182*1d57628fSDongsheng Yang 
183*1d57628fSDongsheng Yang /**
184*1d57628fSDongsheng Yang  * cache_key_append - Append a cache key to the related kset.
185*1d57628fSDongsheng Yang  * @cache: Pointer to the pcache_cache structure.
186*1d57628fSDongsheng Yang  * @key: Pointer to the cache key structure to append.
187*1d57628fSDongsheng Yang  * @force_close: Need to close current kset if true.
188*1d57628fSDongsheng Yang  *
189*1d57628fSDongsheng Yang  * This function appends a cache key to the appropriate kset. If the kset
190*1d57628fSDongsheng Yang  * is full, it closes the kset. If not, it queues a flush work to write
191*1d57628fSDongsheng Yang  * the kset to media.
192*1d57628fSDongsheng Yang  *
193*1d57628fSDongsheng Yang  * Returns 0 on success, or a negative error code on failure.
194*1d57628fSDongsheng Yang  */
195*1d57628fSDongsheng Yang int cache_key_append(struct pcache_cache *cache, struct pcache_cache_key *key, bool force_close)
196*1d57628fSDongsheng Yang {
197*1d57628fSDongsheng Yang 	struct pcache_cache_kset *kset;
198*1d57628fSDongsheng Yang 	struct pcache_cache_kset_onmedia *kset_onmedia;
199*1d57628fSDongsheng Yang 	struct pcache_cache_key_onmedia *key_onmedia;
200*1d57628fSDongsheng Yang 	u32 kset_id = get_kset_id(cache, key->off);
201*1d57628fSDongsheng Yang 	int ret = 0;
202*1d57628fSDongsheng Yang 
203*1d57628fSDongsheng Yang 	kset = get_kset(cache, kset_id);
204*1d57628fSDongsheng Yang 	kset_onmedia = &kset->kset_onmedia;
205*1d57628fSDongsheng Yang 
206*1d57628fSDongsheng Yang 	spin_lock(&kset->kset_lock);
207*1d57628fSDongsheng Yang 	key_onmedia = &kset_onmedia->data[kset_onmedia->key_num];
208*1d57628fSDongsheng Yang 	cache_key_encode(cache, key_onmedia, key);
209*1d57628fSDongsheng Yang 
210*1d57628fSDongsheng Yang 	/* Check if the current kset has reached the maximum number of keys */
211*1d57628fSDongsheng Yang 	if (++kset_onmedia->key_num == PCACHE_KSET_KEYS_MAX || force_close) {
212*1d57628fSDongsheng Yang 		/* If full, close the kset */
213*1d57628fSDongsheng Yang 		ret = cache_kset_close(cache, kset);
214*1d57628fSDongsheng Yang 		if (ret) {
215*1d57628fSDongsheng Yang 			kset_onmedia->key_num--;
216*1d57628fSDongsheng Yang 			goto out;
217*1d57628fSDongsheng Yang 		}
218*1d57628fSDongsheng Yang 	} else {
219*1d57628fSDongsheng Yang 		/* If not full, queue a delayed work to flush the kset */
220*1d57628fSDongsheng Yang 		queue_delayed_work(cache_get_wq(cache), &kset->flush_work, 1 * HZ);
221*1d57628fSDongsheng Yang 	}
222*1d57628fSDongsheng Yang out:
223*1d57628fSDongsheng Yang 	spin_unlock(&kset->kset_lock);
224*1d57628fSDongsheng Yang 
225*1d57628fSDongsheng Yang 	return ret;
226*1d57628fSDongsheng Yang }
227*1d57628fSDongsheng Yang 
228*1d57628fSDongsheng Yang /**
229*1d57628fSDongsheng Yang  * cache_subtree_walk - Traverse the cache tree.
230*1d57628fSDongsheng Yang  * @ctx: Pointer to the context structure for traversal.
231*1d57628fSDongsheng Yang  *
232*1d57628fSDongsheng Yang  * This function traverses the cache tree starting from the specified node.
233*1d57628fSDongsheng Yang  * It calls the appropriate callback functions based on the relationships
234*1d57628fSDongsheng Yang  * between the keys in the cache tree.
235*1d57628fSDongsheng Yang  *
236*1d57628fSDongsheng Yang  * Returns 0 on success, or a negative error code on failure.
237*1d57628fSDongsheng Yang  */
238*1d57628fSDongsheng Yang int cache_subtree_walk(struct pcache_cache_subtree_walk_ctx *ctx)
239*1d57628fSDongsheng Yang {
240*1d57628fSDongsheng Yang 	struct pcache_cache_key *key_tmp, *key;
241*1d57628fSDongsheng Yang 	struct rb_node *node_tmp;
242*1d57628fSDongsheng Yang 	int ret = SUBTREE_WALK_RET_OK;
243*1d57628fSDongsheng Yang 
244*1d57628fSDongsheng Yang 	key = ctx->key;
245*1d57628fSDongsheng Yang 	node_tmp = ctx->start_node;
246*1d57628fSDongsheng Yang 
247*1d57628fSDongsheng Yang 	while (node_tmp) {
248*1d57628fSDongsheng Yang 		if (ctx->walk_done && ctx->walk_done(ctx))
249*1d57628fSDongsheng Yang 			break;
250*1d57628fSDongsheng Yang 
251*1d57628fSDongsheng Yang 		key_tmp = CACHE_KEY(node_tmp);
252*1d57628fSDongsheng Yang 		/*
253*1d57628fSDongsheng Yang 		 * If key_tmp ends before the start of key, continue to the next node.
254*1d57628fSDongsheng Yang 		 * |----------|
255*1d57628fSDongsheng Yang 		 *              |=====|
256*1d57628fSDongsheng Yang 		 */
257*1d57628fSDongsheng Yang 		if (cache_key_lend(key_tmp) <= cache_key_lstart(key)) {
258*1d57628fSDongsheng Yang 			if (ctx->after) {
259*1d57628fSDongsheng Yang 				ret = ctx->after(key, key_tmp, ctx);
260*1d57628fSDongsheng Yang 				if (ret)
261*1d57628fSDongsheng Yang 					goto out;
262*1d57628fSDongsheng Yang 			}
263*1d57628fSDongsheng Yang 			goto next;
264*1d57628fSDongsheng Yang 		}
265*1d57628fSDongsheng Yang 
266*1d57628fSDongsheng Yang 		/*
267*1d57628fSDongsheng Yang 		 * If key_tmp starts after the end of key, stop traversing.
268*1d57628fSDongsheng Yang 		 *	  |--------|
269*1d57628fSDongsheng Yang 		 * |====|
270*1d57628fSDongsheng Yang 		 */
271*1d57628fSDongsheng Yang 		if (cache_key_lstart(key_tmp) >= cache_key_lend(key)) {
272*1d57628fSDongsheng Yang 			if (ctx->before) {
273*1d57628fSDongsheng Yang 				ret = ctx->before(key, key_tmp, ctx);
274*1d57628fSDongsheng Yang 				if (ret)
275*1d57628fSDongsheng Yang 					goto out;
276*1d57628fSDongsheng Yang 			}
277*1d57628fSDongsheng Yang 			break;
278*1d57628fSDongsheng Yang 		}
279*1d57628fSDongsheng Yang 
280*1d57628fSDongsheng Yang 		/* Handle overlapping keys */
281*1d57628fSDongsheng Yang 		if (cache_key_lstart(key_tmp) >= cache_key_lstart(key)) {
282*1d57628fSDongsheng Yang 			/*
283*1d57628fSDongsheng Yang 			 * If key_tmp encompasses key.
284*1d57628fSDongsheng Yang 			 *     |----------------|	key_tmp
285*1d57628fSDongsheng Yang 			 * |===========|		key
286*1d57628fSDongsheng Yang 			 */
287*1d57628fSDongsheng Yang 			if (cache_key_lend(key_tmp) >= cache_key_lend(key)) {
288*1d57628fSDongsheng Yang 				if (ctx->overlap_tail) {
289*1d57628fSDongsheng Yang 					ret = ctx->overlap_tail(key, key_tmp, ctx);
290*1d57628fSDongsheng Yang 					if (ret)
291*1d57628fSDongsheng Yang 						goto out;
292*1d57628fSDongsheng Yang 				}
293*1d57628fSDongsheng Yang 				break;
294*1d57628fSDongsheng Yang 			}
295*1d57628fSDongsheng Yang 
296*1d57628fSDongsheng Yang 			/*
297*1d57628fSDongsheng Yang 			 * If key_tmp is contained within key.
298*1d57628fSDongsheng Yang 			 *    |----|		key_tmp
299*1d57628fSDongsheng Yang 			 * |==========|		key
300*1d57628fSDongsheng Yang 			 */
301*1d57628fSDongsheng Yang 			if (ctx->overlap_contain) {
302*1d57628fSDongsheng Yang 				ret = ctx->overlap_contain(key, key_tmp, ctx);
303*1d57628fSDongsheng Yang 				if (ret)
304*1d57628fSDongsheng Yang 					goto out;
305*1d57628fSDongsheng Yang 			}
306*1d57628fSDongsheng Yang 
307*1d57628fSDongsheng Yang 			goto next;
308*1d57628fSDongsheng Yang 		}
309*1d57628fSDongsheng Yang 
310*1d57628fSDongsheng Yang 		/*
311*1d57628fSDongsheng Yang 		 * If key_tmp starts before key ends but ends after key.
312*1d57628fSDongsheng Yang 		 * |-----------|	key_tmp
313*1d57628fSDongsheng Yang 		 *   |====|		key
314*1d57628fSDongsheng Yang 		 */
315*1d57628fSDongsheng Yang 		if (cache_key_lend(key_tmp) > cache_key_lend(key)) {
316*1d57628fSDongsheng Yang 			if (ctx->overlap_contained) {
317*1d57628fSDongsheng Yang 				ret = ctx->overlap_contained(key, key_tmp, ctx);
318*1d57628fSDongsheng Yang 				if (ret)
319*1d57628fSDongsheng Yang 					goto out;
320*1d57628fSDongsheng Yang 			}
321*1d57628fSDongsheng Yang 			break;
322*1d57628fSDongsheng Yang 		}
323*1d57628fSDongsheng Yang 
324*1d57628fSDongsheng Yang 		/*
325*1d57628fSDongsheng Yang 		 * If key_tmp starts before key and ends within key.
326*1d57628fSDongsheng Yang 		 * |--------|		key_tmp
327*1d57628fSDongsheng Yang 		 *   |==========|	key
328*1d57628fSDongsheng Yang 		 */
329*1d57628fSDongsheng Yang 		if (ctx->overlap_head) {
330*1d57628fSDongsheng Yang 			ret = ctx->overlap_head(key, key_tmp, ctx);
331*1d57628fSDongsheng Yang 			if (ret)
332*1d57628fSDongsheng Yang 				goto out;
333*1d57628fSDongsheng Yang 		}
334*1d57628fSDongsheng Yang next:
335*1d57628fSDongsheng Yang 		node_tmp = rb_next(node_tmp);
336*1d57628fSDongsheng Yang 	}
337*1d57628fSDongsheng Yang 
338*1d57628fSDongsheng Yang out:
339*1d57628fSDongsheng Yang 	if (ctx->walk_finally)
340*1d57628fSDongsheng Yang 		ret = ctx->walk_finally(ctx, ret);
341*1d57628fSDongsheng Yang 
342*1d57628fSDongsheng Yang 	return ret;
343*1d57628fSDongsheng Yang }
344*1d57628fSDongsheng Yang 
345*1d57628fSDongsheng Yang /**
346*1d57628fSDongsheng Yang  * cache_subtree_search - Search for a key in the cache tree.
347*1d57628fSDongsheng Yang  * @cache_subtree: Pointer to the cache tree structure.
348*1d57628fSDongsheng Yang  * @key: Pointer to the cache key to search for.
349*1d57628fSDongsheng Yang  * @parentp: Pointer to store the parent node of the found node.
350*1d57628fSDongsheng Yang  * @newp: Pointer to store the location where the new node should be inserted.
351*1d57628fSDongsheng Yang  * @delete_key_list: List to collect invalid keys for deletion.
352*1d57628fSDongsheng Yang  *
353*1d57628fSDongsheng Yang  * This function searches the cache tree for a specific key and returns
354*1d57628fSDongsheng Yang  * the node that is the predecessor of the key, or first node if the key is
355*1d57628fSDongsheng Yang  * less than all keys in the tree. If any invalid keys are found during
356*1d57628fSDongsheng Yang  * the search, they are added to the delete_key_list for later cleanup.
357*1d57628fSDongsheng Yang  *
358*1d57628fSDongsheng Yang  * Returns a pointer to the previous node.
359*1d57628fSDongsheng Yang  */
360*1d57628fSDongsheng Yang struct rb_node *cache_subtree_search(struct pcache_cache_subtree *cache_subtree, struct pcache_cache_key *key,
361*1d57628fSDongsheng Yang 				  struct rb_node **parentp, struct rb_node ***newp,
362*1d57628fSDongsheng Yang 				  struct list_head *delete_key_list)
363*1d57628fSDongsheng Yang {
364*1d57628fSDongsheng Yang 	struct rb_node **new, *parent = NULL;
365*1d57628fSDongsheng Yang 	struct pcache_cache_key *key_tmp;
366*1d57628fSDongsheng Yang 	struct rb_node *prev_node = NULL;
367*1d57628fSDongsheng Yang 
368*1d57628fSDongsheng Yang 	new = &(cache_subtree->root.rb_node);
369*1d57628fSDongsheng Yang 	while (*new) {
370*1d57628fSDongsheng Yang 		key_tmp = container_of(*new, struct pcache_cache_key, rb_node);
371*1d57628fSDongsheng Yang 		if (cache_key_invalid(key_tmp))
372*1d57628fSDongsheng Yang 			list_add(&key_tmp->list_node, delete_key_list);
373*1d57628fSDongsheng Yang 
374*1d57628fSDongsheng Yang 		parent = *new;
375*1d57628fSDongsheng Yang 		if (key_tmp->off >= key->off) {
376*1d57628fSDongsheng Yang 			new = &((*new)->rb_left);
377*1d57628fSDongsheng Yang 		} else {
378*1d57628fSDongsheng Yang 			prev_node = *new;
379*1d57628fSDongsheng Yang 			new = &((*new)->rb_right);
380*1d57628fSDongsheng Yang 		}
381*1d57628fSDongsheng Yang 	}
382*1d57628fSDongsheng Yang 
383*1d57628fSDongsheng Yang 	if (!prev_node)
384*1d57628fSDongsheng Yang 		prev_node = rb_first(&cache_subtree->root);
385*1d57628fSDongsheng Yang 
386*1d57628fSDongsheng Yang 	if (parentp)
387*1d57628fSDongsheng Yang 		*parentp = parent;
388*1d57628fSDongsheng Yang 
389*1d57628fSDongsheng Yang 	if (newp)
390*1d57628fSDongsheng Yang 		*newp = new;
391*1d57628fSDongsheng Yang 
392*1d57628fSDongsheng Yang 	return prev_node;
393*1d57628fSDongsheng Yang }
394*1d57628fSDongsheng Yang 
395*1d57628fSDongsheng Yang static struct pcache_cache_key *get_pre_alloc_key(struct pcache_cache_subtree_walk_ctx *ctx)
396*1d57628fSDongsheng Yang {
397*1d57628fSDongsheng Yang 	struct pcache_cache_key *key;
398*1d57628fSDongsheng Yang 
399*1d57628fSDongsheng Yang 	if (ctx->pre_alloc_key) {
400*1d57628fSDongsheng Yang 		key = ctx->pre_alloc_key;
401*1d57628fSDongsheng Yang 		ctx->pre_alloc_key = NULL;
402*1d57628fSDongsheng Yang 
403*1d57628fSDongsheng Yang 		return key;
404*1d57628fSDongsheng Yang 	}
405*1d57628fSDongsheng Yang 
406*1d57628fSDongsheng Yang 	return cache_key_alloc(ctx->cache_tree, GFP_NOWAIT);
407*1d57628fSDongsheng Yang }
408*1d57628fSDongsheng Yang 
409*1d57628fSDongsheng Yang /**
410*1d57628fSDongsheng Yang  * fixup_overlap_tail - Adjust the key when it overlaps at the tail.
411*1d57628fSDongsheng Yang  * @key: Pointer to the new cache key being inserted.
412*1d57628fSDongsheng Yang  * @key_tmp: Pointer to the existing key that overlaps.
413*1d57628fSDongsheng Yang  * @ctx: Pointer to the context for walking the cache tree.
414*1d57628fSDongsheng Yang  *
415*1d57628fSDongsheng Yang  * This function modifies the existing key (key_tmp) when there is an
416*1d57628fSDongsheng Yang  * overlap at the tail with the new key. If the modified key becomes
417*1d57628fSDongsheng Yang  * empty, it is deleted.
418*1d57628fSDongsheng Yang  */
419*1d57628fSDongsheng Yang static int fixup_overlap_tail(struct pcache_cache_key *key,
420*1d57628fSDongsheng Yang 			       struct pcache_cache_key *key_tmp,
421*1d57628fSDongsheng Yang 			       struct pcache_cache_subtree_walk_ctx *ctx)
422*1d57628fSDongsheng Yang {
423*1d57628fSDongsheng Yang 	/*
424*1d57628fSDongsheng Yang 	 *     |----------------|	key_tmp
425*1d57628fSDongsheng Yang 	 * |===========|		key
426*1d57628fSDongsheng Yang 	 */
427*1d57628fSDongsheng Yang 	BUG_ON(cache_key_empty(key));
428*1d57628fSDongsheng Yang 	if (cache_key_empty(key_tmp)) {
429*1d57628fSDongsheng Yang 		cache_key_delete(key_tmp);
430*1d57628fSDongsheng Yang 		return SUBTREE_WALK_RET_RESEARCH;
431*1d57628fSDongsheng Yang 	}
432*1d57628fSDongsheng Yang 
433*1d57628fSDongsheng Yang 	cache_key_cutfront(key_tmp, cache_key_lend(key) - cache_key_lstart(key_tmp));
434*1d57628fSDongsheng Yang 	if (key_tmp->len == 0) {
435*1d57628fSDongsheng Yang 		cache_key_delete(key_tmp);
436*1d57628fSDongsheng Yang 		return SUBTREE_WALK_RET_RESEARCH;
437*1d57628fSDongsheng Yang 	}
438*1d57628fSDongsheng Yang 
439*1d57628fSDongsheng Yang 	return SUBTREE_WALK_RET_OK;
440*1d57628fSDongsheng Yang }
441*1d57628fSDongsheng Yang 
442*1d57628fSDongsheng Yang /**
443*1d57628fSDongsheng Yang  * fixup_overlap_contain - Handle case where new key completely contains an existing key.
444*1d57628fSDongsheng Yang  * @key: Pointer to the new cache key being inserted.
445*1d57628fSDongsheng Yang  * @key_tmp: Pointer to the existing key that is being contained.
446*1d57628fSDongsheng Yang  * @ctx: Pointer to the context for walking the cache tree.
447*1d57628fSDongsheng Yang  *
448*1d57628fSDongsheng Yang  * This function deletes the existing key (key_tmp) when the new key
449*1d57628fSDongsheng Yang  * completely contains it. It returns SUBTREE_WALK_RET_RESEARCH to indicate that the
450*1d57628fSDongsheng Yang  * tree structure may have changed, necessitating a re-insertion of
451*1d57628fSDongsheng Yang  * the new key.
452*1d57628fSDongsheng Yang  */
453*1d57628fSDongsheng Yang static int fixup_overlap_contain(struct pcache_cache_key *key,
454*1d57628fSDongsheng Yang 				  struct pcache_cache_key *key_tmp,
455*1d57628fSDongsheng Yang 				  struct pcache_cache_subtree_walk_ctx *ctx)
456*1d57628fSDongsheng Yang {
457*1d57628fSDongsheng Yang 	/*
458*1d57628fSDongsheng Yang 	 *    |----|			key_tmp
459*1d57628fSDongsheng Yang 	 * |==========|			key
460*1d57628fSDongsheng Yang 	 */
461*1d57628fSDongsheng Yang 	BUG_ON(cache_key_empty(key));
462*1d57628fSDongsheng Yang 	cache_key_delete(key_tmp);
463*1d57628fSDongsheng Yang 
464*1d57628fSDongsheng Yang 	return SUBTREE_WALK_RET_RESEARCH;
465*1d57628fSDongsheng Yang }
466*1d57628fSDongsheng Yang 
467*1d57628fSDongsheng Yang /**
468*1d57628fSDongsheng Yang  * fixup_overlap_contained - Handle overlap when a new key is contained in an existing key.
469*1d57628fSDongsheng Yang  * @key: The new cache key being inserted.
470*1d57628fSDongsheng Yang  * @key_tmp: The existing cache key that overlaps with the new key.
471*1d57628fSDongsheng Yang  * @ctx: Context for the cache tree walk.
472*1d57628fSDongsheng Yang  *
473*1d57628fSDongsheng Yang  * This function adjusts the existing key if the new key is contained
474*1d57628fSDongsheng Yang  * within it. If the existing key is empty, it indicates a placeholder key
475*1d57628fSDongsheng Yang  * that was inserted during a miss read. This placeholder will later be
476*1d57628fSDongsheng Yang  * updated with real data from the backing_dev, making it no longer an empty key.
477*1d57628fSDongsheng Yang  *
478*1d57628fSDongsheng Yang  * If we delete key or insert a key, the structure of the entire cache tree may change,
479*1d57628fSDongsheng Yang  * requiring a full research of the tree to find a new insertion point.
480*1d57628fSDongsheng Yang  */
481*1d57628fSDongsheng Yang static int fixup_overlap_contained(struct pcache_cache_key *key,
482*1d57628fSDongsheng Yang 	struct pcache_cache_key *key_tmp, struct pcache_cache_subtree_walk_ctx *ctx)
483*1d57628fSDongsheng Yang {
484*1d57628fSDongsheng Yang 	struct pcache_cache_tree *cache_tree = ctx->cache_tree;
485*1d57628fSDongsheng Yang 
486*1d57628fSDongsheng Yang 	/*
487*1d57628fSDongsheng Yang 	 * |-----------|		key_tmp
488*1d57628fSDongsheng Yang 	 *   |====|			key
489*1d57628fSDongsheng Yang 	 */
490*1d57628fSDongsheng Yang 	BUG_ON(cache_key_empty(key));
491*1d57628fSDongsheng Yang 	if (cache_key_empty(key_tmp)) {
492*1d57628fSDongsheng Yang 		/* If key_tmp is empty, don't split it;
493*1d57628fSDongsheng Yang 		 * it's a placeholder key for miss reads that will be updated later.
494*1d57628fSDongsheng Yang 		 */
495*1d57628fSDongsheng Yang 		cache_key_cutback(key_tmp, cache_key_lend(key_tmp) - cache_key_lstart(key));
496*1d57628fSDongsheng Yang 		if (key_tmp->len == 0) {
497*1d57628fSDongsheng Yang 			cache_key_delete(key_tmp);
498*1d57628fSDongsheng Yang 			return SUBTREE_WALK_RET_RESEARCH;
499*1d57628fSDongsheng Yang 		}
500*1d57628fSDongsheng Yang 	} else {
501*1d57628fSDongsheng Yang 		struct pcache_cache_key *key_fixup;
502*1d57628fSDongsheng Yang 		bool need_research = false;
503*1d57628fSDongsheng Yang 
504*1d57628fSDongsheng Yang 		key_fixup = get_pre_alloc_key(ctx);
505*1d57628fSDongsheng Yang 		if (!key_fixup)
506*1d57628fSDongsheng Yang 			return SUBTREE_WALK_RET_NEED_KEY;
507*1d57628fSDongsheng Yang 
508*1d57628fSDongsheng Yang 		cache_key_copy(key_fixup, key_tmp);
509*1d57628fSDongsheng Yang 
510*1d57628fSDongsheng Yang 		/* Split key_tmp based on the new key's range */
511*1d57628fSDongsheng Yang 		cache_key_cutback(key_tmp, cache_key_lend(key_tmp) - cache_key_lstart(key));
512*1d57628fSDongsheng Yang 		if (key_tmp->len == 0) {
513*1d57628fSDongsheng Yang 			cache_key_delete(key_tmp);
514*1d57628fSDongsheng Yang 			need_research = true;
515*1d57628fSDongsheng Yang 		}
516*1d57628fSDongsheng Yang 
517*1d57628fSDongsheng Yang 		/* Create a new portion for key_fixup */
518*1d57628fSDongsheng Yang 		cache_key_cutfront(key_fixup, cache_key_lend(key) - cache_key_lstart(key_tmp));
519*1d57628fSDongsheng Yang 		if (key_fixup->len == 0) {
520*1d57628fSDongsheng Yang 			cache_key_put(key_fixup);
521*1d57628fSDongsheng Yang 		} else {
522*1d57628fSDongsheng Yang 			/* Insert the new key into the cache */
523*1d57628fSDongsheng Yang 			cache_key_insert(cache_tree, key_fixup, false);
524*1d57628fSDongsheng Yang 			need_research = true;
525*1d57628fSDongsheng Yang 		}
526*1d57628fSDongsheng Yang 
527*1d57628fSDongsheng Yang 		if (need_research)
528*1d57628fSDongsheng Yang 			return SUBTREE_WALK_RET_RESEARCH;
529*1d57628fSDongsheng Yang 	}
530*1d57628fSDongsheng Yang 
531*1d57628fSDongsheng Yang 	return SUBTREE_WALK_RET_OK;
532*1d57628fSDongsheng Yang }
533*1d57628fSDongsheng Yang 
534*1d57628fSDongsheng Yang /**
535*1d57628fSDongsheng Yang  * fixup_overlap_head - Handle overlap when a new key overlaps with the head of an existing key.
536*1d57628fSDongsheng Yang  * @key: The new cache key being inserted.
537*1d57628fSDongsheng Yang  * @key_tmp: The existing cache key that overlaps with the new key.
538*1d57628fSDongsheng Yang  * @ctx: Context for the cache tree walk.
539*1d57628fSDongsheng Yang  *
540*1d57628fSDongsheng Yang  * This function adjusts the existing key if the new key overlaps
541*1d57628fSDongsheng Yang  * with the beginning of it. If the resulting key length is zero
542*1d57628fSDongsheng Yang  * after the adjustment, the key is deleted. This indicates that
543*1d57628fSDongsheng Yang  * the key no longer holds valid data and requires the tree to be
544*1d57628fSDongsheng Yang  * re-researched for a new insertion point.
545*1d57628fSDongsheng Yang  */
546*1d57628fSDongsheng Yang static int fixup_overlap_head(struct pcache_cache_key *key,
547*1d57628fSDongsheng Yang 	struct pcache_cache_key *key_tmp, struct pcache_cache_subtree_walk_ctx *ctx)
548*1d57628fSDongsheng Yang {
549*1d57628fSDongsheng Yang 	/*
550*1d57628fSDongsheng Yang 	 * |--------|		key_tmp
551*1d57628fSDongsheng Yang 	 *   |==========|	key
552*1d57628fSDongsheng Yang 	 */
553*1d57628fSDongsheng Yang 	BUG_ON(cache_key_empty(key));
554*1d57628fSDongsheng Yang 	/* Adjust key_tmp by cutting back based on the new key's start */
555*1d57628fSDongsheng Yang 	cache_key_cutback(key_tmp, cache_key_lend(key_tmp) - cache_key_lstart(key));
556*1d57628fSDongsheng Yang 	if (key_tmp->len == 0) {
557*1d57628fSDongsheng Yang 		/* If the adjusted key_tmp length is zero, delete it */
558*1d57628fSDongsheng Yang 		cache_key_delete(key_tmp);
559*1d57628fSDongsheng Yang 		return SUBTREE_WALK_RET_RESEARCH;
560*1d57628fSDongsheng Yang 	}
561*1d57628fSDongsheng Yang 
562*1d57628fSDongsheng Yang 	return SUBTREE_WALK_RET_OK;
563*1d57628fSDongsheng Yang }
564*1d57628fSDongsheng Yang 
565*1d57628fSDongsheng Yang /**
566*1d57628fSDongsheng Yang  * cache_key_insert - Insert a new cache key into the cache tree.
567*1d57628fSDongsheng Yang  * @cache_tree: Pointer to the cache_tree structure.
568*1d57628fSDongsheng Yang  * @key: The cache key to insert.
569*1d57628fSDongsheng Yang  * @fixup: Indicates if this is a new key being inserted.
570*1d57628fSDongsheng Yang  *
571*1d57628fSDongsheng Yang  * This function searches for the appropriate location to insert
572*1d57628fSDongsheng Yang  * a new cache key into the cache tree. It handles key overlaps
573*1d57628fSDongsheng Yang  * and ensures any invalid keys are removed before insertion.
574*1d57628fSDongsheng Yang  */
575*1d57628fSDongsheng Yang void cache_key_insert(struct pcache_cache_tree *cache_tree, struct pcache_cache_key *key, bool fixup)
576*1d57628fSDongsheng Yang {
577*1d57628fSDongsheng Yang 	struct pcache_cache *cache = cache_tree->cache;
578*1d57628fSDongsheng Yang 	struct pcache_cache_subtree_walk_ctx walk_ctx = { 0 };
579*1d57628fSDongsheng Yang 	struct rb_node **new, *parent = NULL;
580*1d57628fSDongsheng Yang 	struct pcache_cache_subtree *cache_subtree;
581*1d57628fSDongsheng Yang 	struct pcache_cache_key *key_tmp = NULL, *key_next;
582*1d57628fSDongsheng Yang 	struct rb_node *prev_node = NULL;
583*1d57628fSDongsheng Yang 	LIST_HEAD(delete_key_list);
584*1d57628fSDongsheng Yang 	int ret;
585*1d57628fSDongsheng Yang 
586*1d57628fSDongsheng Yang 	cache_subtree = get_subtree(cache_tree, key->off);
587*1d57628fSDongsheng Yang 	key->cache_subtree = cache_subtree;
588*1d57628fSDongsheng Yang search:
589*1d57628fSDongsheng Yang 	prev_node = cache_subtree_search(cache_subtree, key, &parent, &new, &delete_key_list);
590*1d57628fSDongsheng Yang 	if (!list_empty(&delete_key_list)) {
591*1d57628fSDongsheng Yang 		/* Remove invalid keys from the delete list */
592*1d57628fSDongsheng Yang 		list_for_each_entry_safe(key_tmp, key_next, &delete_key_list, list_node) {
593*1d57628fSDongsheng Yang 			list_del_init(&key_tmp->list_node);
594*1d57628fSDongsheng Yang 			cache_key_delete(key_tmp);
595*1d57628fSDongsheng Yang 		}
596*1d57628fSDongsheng Yang 		goto search;
597*1d57628fSDongsheng Yang 	}
598*1d57628fSDongsheng Yang 
599*1d57628fSDongsheng Yang 	if (fixup) {
600*1d57628fSDongsheng Yang 		/* Set up the context with the cache, start node, and new key */
601*1d57628fSDongsheng Yang 		walk_ctx.cache_tree = cache_tree;
602*1d57628fSDongsheng Yang 		walk_ctx.start_node = prev_node;
603*1d57628fSDongsheng Yang 		walk_ctx.key = key;
604*1d57628fSDongsheng Yang 
605*1d57628fSDongsheng Yang 		/* Assign overlap handling functions for different scenarios */
606*1d57628fSDongsheng Yang 		walk_ctx.overlap_tail = fixup_overlap_tail;
607*1d57628fSDongsheng Yang 		walk_ctx.overlap_head = fixup_overlap_head;
608*1d57628fSDongsheng Yang 		walk_ctx.overlap_contain = fixup_overlap_contain;
609*1d57628fSDongsheng Yang 		walk_ctx.overlap_contained = fixup_overlap_contained;
610*1d57628fSDongsheng Yang 
611*1d57628fSDongsheng Yang 		ret = cache_subtree_walk(&walk_ctx);
612*1d57628fSDongsheng Yang 		switch (ret) {
613*1d57628fSDongsheng Yang 		case SUBTREE_WALK_RET_OK:
614*1d57628fSDongsheng Yang 			break;
615*1d57628fSDongsheng Yang 		case SUBTREE_WALK_RET_RESEARCH:
616*1d57628fSDongsheng Yang 			goto search;
617*1d57628fSDongsheng Yang 		case SUBTREE_WALK_RET_NEED_KEY:
618*1d57628fSDongsheng Yang 			spin_unlock(&cache_subtree->tree_lock);
619*1d57628fSDongsheng Yang 			pcache_dev_debug(CACHE_TO_PCACHE(cache), "allocate pre_alloc_key with GFP_NOIO");
620*1d57628fSDongsheng Yang 			walk_ctx.pre_alloc_key = cache_key_alloc(cache_tree, GFP_NOIO);
621*1d57628fSDongsheng Yang 			spin_lock(&cache_subtree->tree_lock);
622*1d57628fSDongsheng Yang 			goto search;
623*1d57628fSDongsheng Yang 		default:
624*1d57628fSDongsheng Yang 			BUG();
625*1d57628fSDongsheng Yang 		}
626*1d57628fSDongsheng Yang 	}
627*1d57628fSDongsheng Yang 
628*1d57628fSDongsheng Yang 	if (walk_ctx.pre_alloc_key)
629*1d57628fSDongsheng Yang 		cache_key_put(walk_ctx.pre_alloc_key);
630*1d57628fSDongsheng Yang 
631*1d57628fSDongsheng Yang 	/* Link and insert the new key into the red-black tree */
632*1d57628fSDongsheng Yang 	rb_link_node(&key->rb_node, parent, new);
633*1d57628fSDongsheng Yang 	rb_insert_color(&key->rb_node, &cache_subtree->root);
634*1d57628fSDongsheng Yang }
635*1d57628fSDongsheng Yang 
636*1d57628fSDongsheng Yang /**
637*1d57628fSDongsheng Yang  * clean_fn - Cleanup function to remove invalid keys from the cache tree.
638*1d57628fSDongsheng Yang  * @work: Pointer to the work_struct associated with the cleanup.
639*1d57628fSDongsheng Yang  *
640*1d57628fSDongsheng Yang  * This function cleans up invalid keys from the cache tree in the background
641*1d57628fSDongsheng Yang  * after a cache segment has been invalidated during cache garbage collection.
642*1d57628fSDongsheng Yang  * It processes a maximum of PCACHE_CLEAN_KEYS_MAX keys per iteration and holds
643*1d57628fSDongsheng Yang  * the tree lock to ensure thread safety.
644*1d57628fSDongsheng Yang  */
645*1d57628fSDongsheng Yang void clean_fn(struct work_struct *work)
646*1d57628fSDongsheng Yang {
647*1d57628fSDongsheng Yang 	struct pcache_cache *cache = container_of(work, struct pcache_cache, clean_work);
648*1d57628fSDongsheng Yang 	struct pcache_cache_subtree *cache_subtree;
649*1d57628fSDongsheng Yang 	struct rb_node *node;
650*1d57628fSDongsheng Yang 	struct pcache_cache_key *key;
651*1d57628fSDongsheng Yang 	int i, count;
652*1d57628fSDongsheng Yang 
653*1d57628fSDongsheng Yang 	for (i = 0; i < cache->req_key_tree.n_subtrees; i++) {
654*1d57628fSDongsheng Yang 		cache_subtree = &cache->req_key_tree.subtrees[i];
655*1d57628fSDongsheng Yang 
656*1d57628fSDongsheng Yang again:
657*1d57628fSDongsheng Yang 		if (pcache_is_stopping(CACHE_TO_PCACHE(cache)))
658*1d57628fSDongsheng Yang 			return;
659*1d57628fSDongsheng Yang 
660*1d57628fSDongsheng Yang 		/* Delete up to PCACHE_CLEAN_KEYS_MAX keys in one iteration */
661*1d57628fSDongsheng Yang 		count = 0;
662*1d57628fSDongsheng Yang 		spin_lock(&cache_subtree->tree_lock);
663*1d57628fSDongsheng Yang 		node = rb_first(&cache_subtree->root);
664*1d57628fSDongsheng Yang 		while (node) {
665*1d57628fSDongsheng Yang 			key = CACHE_KEY(node);
666*1d57628fSDongsheng Yang 			node = rb_next(node);
667*1d57628fSDongsheng Yang 			if (cache_key_invalid(key)) {
668*1d57628fSDongsheng Yang 				count++;
669*1d57628fSDongsheng Yang 				cache_key_delete(key);
670*1d57628fSDongsheng Yang 			}
671*1d57628fSDongsheng Yang 
672*1d57628fSDongsheng Yang 			if (count >= PCACHE_CLEAN_KEYS_MAX) {
673*1d57628fSDongsheng Yang 				/* Unlock and pause before continuing cleanup */
674*1d57628fSDongsheng Yang 				spin_unlock(&cache_subtree->tree_lock);
675*1d57628fSDongsheng Yang 				usleep_range(1000, 2000);
676*1d57628fSDongsheng Yang 				goto again;
677*1d57628fSDongsheng Yang 			}
678*1d57628fSDongsheng Yang 		}
679*1d57628fSDongsheng Yang 		spin_unlock(&cache_subtree->tree_lock);
680*1d57628fSDongsheng Yang 	}
681*1d57628fSDongsheng Yang }
682*1d57628fSDongsheng Yang 
683*1d57628fSDongsheng Yang /*
684*1d57628fSDongsheng Yang  * kset_flush_fn - Flush work for a cache kset.
685*1d57628fSDongsheng Yang  *
686*1d57628fSDongsheng Yang  * This function is called when a kset flush work is queued from
687*1d57628fSDongsheng Yang  * cache_key_append(). If the kset is full, it will be closed
688*1d57628fSDongsheng Yang  * immediately. If not, the flush work will be queued for later closure.
689*1d57628fSDongsheng Yang  *
690*1d57628fSDongsheng Yang  * If cache_kset_close detects that a new segment is required to store
691*1d57628fSDongsheng Yang  * the kset and there are no available segments, it will return an error.
692*1d57628fSDongsheng Yang  * In this scenario, a retry will be attempted.
693*1d57628fSDongsheng Yang  */
694*1d57628fSDongsheng Yang void kset_flush_fn(struct work_struct *work)
695*1d57628fSDongsheng Yang {
696*1d57628fSDongsheng Yang 	struct pcache_cache_kset *kset = container_of(work, struct pcache_cache_kset, flush_work.work);
697*1d57628fSDongsheng Yang 	struct pcache_cache *cache = kset->cache;
698*1d57628fSDongsheng Yang 	int ret;
699*1d57628fSDongsheng Yang 
700*1d57628fSDongsheng Yang 	if (pcache_is_stopping(CACHE_TO_PCACHE(cache)))
701*1d57628fSDongsheng Yang 		return;
702*1d57628fSDongsheng Yang 
703*1d57628fSDongsheng Yang 	spin_lock(&kset->kset_lock);
704*1d57628fSDongsheng Yang 	ret = cache_kset_close(cache, kset);
705*1d57628fSDongsheng Yang 	spin_unlock(&kset->kset_lock);
706*1d57628fSDongsheng Yang 
707*1d57628fSDongsheng Yang 	if (ret) {
708*1d57628fSDongsheng Yang 		/* Failed to flush kset, schedule a retry. */
709*1d57628fSDongsheng Yang 		queue_delayed_work(cache_get_wq(cache), &kset->flush_work, msecs_to_jiffies(100));
710*1d57628fSDongsheng Yang 	}
711*1d57628fSDongsheng Yang }
712*1d57628fSDongsheng Yang 
713*1d57628fSDongsheng Yang static int kset_replay(struct pcache_cache *cache, struct pcache_cache_kset_onmedia *kset_onmedia)
714*1d57628fSDongsheng Yang {
715*1d57628fSDongsheng Yang 	struct pcache_cache_key_onmedia *key_onmedia;
716*1d57628fSDongsheng Yang 	struct pcache_cache_subtree *cache_subtree;
717*1d57628fSDongsheng Yang 	struct pcache_cache_key *key;
718*1d57628fSDongsheng Yang 	int ret;
719*1d57628fSDongsheng Yang 	int i;
720*1d57628fSDongsheng Yang 
721*1d57628fSDongsheng Yang 	for (i = 0; i < kset_onmedia->key_num; i++) {
722*1d57628fSDongsheng Yang 		key_onmedia = &kset_onmedia->data[i];
723*1d57628fSDongsheng Yang 
724*1d57628fSDongsheng Yang 		key = cache_key_alloc(&cache->req_key_tree, GFP_NOIO);
725*1d57628fSDongsheng Yang 		ret = cache_key_decode(cache, key_onmedia, key);
726*1d57628fSDongsheng Yang 		if (ret) {
727*1d57628fSDongsheng Yang 			cache_key_put(key);
728*1d57628fSDongsheng Yang 			goto err;
729*1d57628fSDongsheng Yang 		}
730*1d57628fSDongsheng Yang 
731*1d57628fSDongsheng Yang 		__set_bit(key->cache_pos.cache_seg->cache_seg_id, cache->seg_map);
732*1d57628fSDongsheng Yang 
733*1d57628fSDongsheng Yang 		/* Check if the segment generation is valid for insertion. */
734*1d57628fSDongsheng Yang 		if (key->seg_gen < key->cache_pos.cache_seg->gen) {
735*1d57628fSDongsheng Yang 			cache_key_put(key);
736*1d57628fSDongsheng Yang 		} else {
737*1d57628fSDongsheng Yang 			cache_subtree = get_subtree(&cache->req_key_tree, key->off);
738*1d57628fSDongsheng Yang 			spin_lock(&cache_subtree->tree_lock);
739*1d57628fSDongsheng Yang 			cache_key_insert(&cache->req_key_tree, key, true);
740*1d57628fSDongsheng Yang 			spin_unlock(&cache_subtree->tree_lock);
741*1d57628fSDongsheng Yang 		}
742*1d57628fSDongsheng Yang 
743*1d57628fSDongsheng Yang 		cache_seg_get(key->cache_pos.cache_seg);
744*1d57628fSDongsheng Yang 	}
745*1d57628fSDongsheng Yang 
746*1d57628fSDongsheng Yang 	return 0;
747*1d57628fSDongsheng Yang err:
748*1d57628fSDongsheng Yang 	return ret;
749*1d57628fSDongsheng Yang }
750*1d57628fSDongsheng Yang 
751*1d57628fSDongsheng Yang int cache_replay(struct pcache_cache *cache)
752*1d57628fSDongsheng Yang {
753*1d57628fSDongsheng Yang 	struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
754*1d57628fSDongsheng Yang 	struct pcache_cache_pos pos_tail;
755*1d57628fSDongsheng Yang 	struct pcache_cache_pos *pos;
756*1d57628fSDongsheng Yang 	struct pcache_cache_kset_onmedia *kset_onmedia;
757*1d57628fSDongsheng Yang 	u32 to_copy, count = 0;
758*1d57628fSDongsheng Yang 	int ret = 0;
759*1d57628fSDongsheng Yang 
760*1d57628fSDongsheng Yang 	kset_onmedia = kzalloc(PCACHE_KSET_ONMEDIA_SIZE_MAX, GFP_KERNEL);
761*1d57628fSDongsheng Yang 	if (!kset_onmedia)
762*1d57628fSDongsheng Yang 		return -ENOMEM;
763*1d57628fSDongsheng Yang 
764*1d57628fSDongsheng Yang 	cache_pos_copy(&pos_tail, &cache->key_tail);
765*1d57628fSDongsheng Yang 	pos = &pos_tail;
766*1d57628fSDongsheng Yang 
767*1d57628fSDongsheng Yang 	/*
768*1d57628fSDongsheng Yang 	 * In cache replaying stage, there is no other one will access
769*1d57628fSDongsheng Yang 	 * cache->seg_map, so we can set bit here without cache->seg_map_lock.
770*1d57628fSDongsheng Yang 	 */
771*1d57628fSDongsheng Yang 	__set_bit(pos->cache_seg->cache_seg_id, cache->seg_map);
772*1d57628fSDongsheng Yang 
773*1d57628fSDongsheng Yang 	while (true) {
774*1d57628fSDongsheng Yang 		to_copy = min(PCACHE_KSET_ONMEDIA_SIZE_MAX, PCACHE_SEG_SIZE - pos->seg_off);
775*1d57628fSDongsheng Yang 		ret = copy_mc_to_kernel(kset_onmedia, cache_pos_addr(pos), to_copy);
776*1d57628fSDongsheng Yang 		if (ret) {
777*1d57628fSDongsheng Yang 			ret = -EIO;
778*1d57628fSDongsheng Yang 			goto out;
779*1d57628fSDongsheng Yang 		}
780*1d57628fSDongsheng Yang 
781*1d57628fSDongsheng Yang 		if (kset_onmedia->magic != PCACHE_KSET_MAGIC ||
782*1d57628fSDongsheng Yang 				kset_onmedia->crc != cache_kset_crc(kset_onmedia)) {
783*1d57628fSDongsheng Yang 			break;
784*1d57628fSDongsheng Yang 		}
785*1d57628fSDongsheng Yang 
786*1d57628fSDongsheng Yang 		/* Process the last kset and prepare for the next segment. */
787*1d57628fSDongsheng Yang 		if (kset_onmedia->flags & PCACHE_KSET_FLAGS_LAST) {
788*1d57628fSDongsheng Yang 			struct pcache_cache_segment *next_seg;
789*1d57628fSDongsheng Yang 
790*1d57628fSDongsheng Yang 			pcache_dev_debug(pcache, "last kset replay, next: %u\n", kset_onmedia->next_cache_seg_id);
791*1d57628fSDongsheng Yang 
792*1d57628fSDongsheng Yang 			next_seg = &cache->segments[kset_onmedia->next_cache_seg_id];
793*1d57628fSDongsheng Yang 
794*1d57628fSDongsheng Yang 			pos->cache_seg = next_seg;
795*1d57628fSDongsheng Yang 			pos->seg_off = 0;
796*1d57628fSDongsheng Yang 
797*1d57628fSDongsheng Yang 			__set_bit(pos->cache_seg->cache_seg_id, cache->seg_map);
798*1d57628fSDongsheng Yang 			continue;
799*1d57628fSDongsheng Yang 		}
800*1d57628fSDongsheng Yang 
801*1d57628fSDongsheng Yang 		/* Replay the kset and check for errors. */
802*1d57628fSDongsheng Yang 		ret = kset_replay(cache, kset_onmedia);
803*1d57628fSDongsheng Yang 		if (ret)
804*1d57628fSDongsheng Yang 			goto out;
805*1d57628fSDongsheng Yang 
806*1d57628fSDongsheng Yang 		/* Advance the position after processing the kset. */
807*1d57628fSDongsheng Yang 		cache_pos_advance(pos, get_kset_onmedia_size(kset_onmedia));
808*1d57628fSDongsheng Yang 		if (++count > 512) {
809*1d57628fSDongsheng Yang 			cond_resched();
810*1d57628fSDongsheng Yang 			count = 0;
811*1d57628fSDongsheng Yang 		}
812*1d57628fSDongsheng Yang 	}
813*1d57628fSDongsheng Yang 
814*1d57628fSDongsheng Yang 	/* Update the key_head position after replaying. */
815*1d57628fSDongsheng Yang 	spin_lock(&cache->key_head_lock);
816*1d57628fSDongsheng Yang 	cache_pos_copy(&cache->key_head, pos);
817*1d57628fSDongsheng Yang 	spin_unlock(&cache->key_head_lock);
818*1d57628fSDongsheng Yang out:
819*1d57628fSDongsheng Yang 	kfree(kset_onmedia);
820*1d57628fSDongsheng Yang 	return ret;
821*1d57628fSDongsheng Yang }
822*1d57628fSDongsheng Yang 
823*1d57628fSDongsheng Yang int cache_tree_init(struct pcache_cache *cache, struct pcache_cache_tree *cache_tree, u32 n_subtrees)
824*1d57628fSDongsheng Yang {
825*1d57628fSDongsheng Yang 	int ret;
826*1d57628fSDongsheng Yang 	u32 i;
827*1d57628fSDongsheng Yang 
828*1d57628fSDongsheng Yang 	cache_tree->cache = cache;
829*1d57628fSDongsheng Yang 	cache_tree->n_subtrees = n_subtrees;
830*1d57628fSDongsheng Yang 
831*1d57628fSDongsheng Yang 	ret = mempool_init_slab_pool(&cache_tree->key_pool, 1024, key_cache);
832*1d57628fSDongsheng Yang 	if (ret)
833*1d57628fSDongsheng Yang 		goto err;
834*1d57628fSDongsheng Yang 
835*1d57628fSDongsheng Yang 	/*
836*1d57628fSDongsheng Yang 	 * Allocate and initialize the subtrees array.
837*1d57628fSDongsheng Yang 	 * Each element is a cache tree structure that contains
838*1d57628fSDongsheng Yang 	 * an RB tree root and a spinlock for protecting its contents.
839*1d57628fSDongsheng Yang 	 */
840*1d57628fSDongsheng Yang 	cache_tree->subtrees = kvcalloc(cache_tree->n_subtrees, sizeof(struct pcache_cache_subtree), GFP_KERNEL);
841*1d57628fSDongsheng Yang 	if (!cache_tree->subtrees) {
842*1d57628fSDongsheng Yang 		ret = -ENOMEM;
843*1d57628fSDongsheng Yang 		goto key_pool_exit;
844*1d57628fSDongsheng Yang 	}
845*1d57628fSDongsheng Yang 
846*1d57628fSDongsheng Yang 	for (i = 0; i < cache_tree->n_subtrees; i++) {
847*1d57628fSDongsheng Yang 		struct pcache_cache_subtree *cache_subtree = &cache_tree->subtrees[i];
848*1d57628fSDongsheng Yang 
849*1d57628fSDongsheng Yang 		cache_subtree->root = RB_ROOT;
850*1d57628fSDongsheng Yang 		spin_lock_init(&cache_subtree->tree_lock);
851*1d57628fSDongsheng Yang 	}
852*1d57628fSDongsheng Yang 
853*1d57628fSDongsheng Yang 	return 0;
854*1d57628fSDongsheng Yang 
855*1d57628fSDongsheng Yang key_pool_exit:
856*1d57628fSDongsheng Yang 	mempool_exit(&cache_tree->key_pool);
857*1d57628fSDongsheng Yang err:
858*1d57628fSDongsheng Yang 	return ret;
859*1d57628fSDongsheng Yang }
860*1d57628fSDongsheng Yang 
861*1d57628fSDongsheng Yang void cache_tree_clear(struct pcache_cache_tree *cache_tree)
862*1d57628fSDongsheng Yang {
863*1d57628fSDongsheng Yang 	struct pcache_cache_subtree *cache_subtree;
864*1d57628fSDongsheng Yang 	struct rb_node *node;
865*1d57628fSDongsheng Yang 	struct pcache_cache_key *key;
866*1d57628fSDongsheng Yang 	u32 i;
867*1d57628fSDongsheng Yang 
868*1d57628fSDongsheng Yang 	for (i = 0; i < cache_tree->n_subtrees; i++) {
869*1d57628fSDongsheng Yang 		cache_subtree = &cache_tree->subtrees[i];
870*1d57628fSDongsheng Yang 
871*1d57628fSDongsheng Yang 		spin_lock(&cache_subtree->tree_lock);
872*1d57628fSDongsheng Yang 		node = rb_first(&cache_subtree->root);
873*1d57628fSDongsheng Yang 		while (node) {
874*1d57628fSDongsheng Yang 			key = CACHE_KEY(node);
875*1d57628fSDongsheng Yang 			node = rb_next(node);
876*1d57628fSDongsheng Yang 
877*1d57628fSDongsheng Yang 			cache_key_delete(key);
878*1d57628fSDongsheng Yang 		}
879*1d57628fSDongsheng Yang 		spin_unlock(&cache_subtree->tree_lock);
880*1d57628fSDongsheng Yang 	}
881*1d57628fSDongsheng Yang }
882*1d57628fSDongsheng Yang 
883*1d57628fSDongsheng Yang void cache_tree_exit(struct pcache_cache_tree *cache_tree)
884*1d57628fSDongsheng Yang {
885*1d57628fSDongsheng Yang 	cache_tree_clear(cache_tree);
886*1d57628fSDongsheng Yang 	kvfree(cache_tree->subtrees);
887*1d57628fSDongsheng Yang 	mempool_exit(&cache_tree->key_pool);
888*1d57628fSDongsheng Yang }
889