xref: /linux/drivers/infiniband/core/frmr_pools.c (revision 4b0b946019e7376752456380b67e54eea2f10a7c)
1ce5df0b8SMichael Guralnik // SPDX-License-Identifier: GPL-2.0  OR Linux-OpenIB
2ce5df0b8SMichael Guralnik /*
3ce5df0b8SMichael Guralnik  * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4ce5df0b8SMichael Guralnik  */
5ce5df0b8SMichael Guralnik 
6ce5df0b8SMichael Guralnik #include <linux/slab.h>
7ce5df0b8SMichael Guralnik #include <linux/rbtree.h>
8ce5df0b8SMichael Guralnik #include <linux/sort.h>
9ce5df0b8SMichael Guralnik #include <linux/spinlock.h>
10ce5df0b8SMichael Guralnik #include <rdma/ib_verbs.h>
1184cb1dd0SMichael Guralnik #include <linux/timer.h>
12ce5df0b8SMichael Guralnik 
13ce5df0b8SMichael Guralnik #include "frmr_pools.h"
14ce5df0b8SMichael Guralnik 
1584cb1dd0SMichael Guralnik #define FRMR_POOLS_DEFAULT_AGING_PERIOD_SECS 60
1684cb1dd0SMichael Guralnik 
17ce5df0b8SMichael Guralnik static int push_handle_to_queue_locked(struct frmr_queue *queue, u32 handle)
18ce5df0b8SMichael Guralnik {
19ce5df0b8SMichael Guralnik 	u32 tmp = queue->ci % NUM_HANDLES_PER_PAGE;
20ce5df0b8SMichael Guralnik 	struct frmr_handles_page *page;
21ce5df0b8SMichael Guralnik 
22ce5df0b8SMichael Guralnik 	if (queue->ci >= queue->num_pages * NUM_HANDLES_PER_PAGE) {
23ce5df0b8SMichael Guralnik 		page = kzalloc_obj(*page, GFP_ATOMIC);
24ce5df0b8SMichael Guralnik 		if (!page)
25ce5df0b8SMichael Guralnik 			return -ENOMEM;
26ce5df0b8SMichael Guralnik 		queue->num_pages++;
27ce5df0b8SMichael Guralnik 		list_add_tail(&page->list, &queue->pages_list);
28ce5df0b8SMichael Guralnik 	} else {
29ce5df0b8SMichael Guralnik 		page = list_last_entry(&queue->pages_list,
30ce5df0b8SMichael Guralnik 				       struct frmr_handles_page, list);
31ce5df0b8SMichael Guralnik 	}
32ce5df0b8SMichael Guralnik 
33ce5df0b8SMichael Guralnik 	page->handles[tmp] = handle;
34ce5df0b8SMichael Guralnik 	queue->ci++;
35ce5df0b8SMichael Guralnik 	return 0;
36ce5df0b8SMichael Guralnik }
37ce5df0b8SMichael Guralnik 
38ce5df0b8SMichael Guralnik static u32 pop_handle_from_queue_locked(struct frmr_queue *queue)
39ce5df0b8SMichael Guralnik {
40ce5df0b8SMichael Guralnik 	u32 tmp = (queue->ci - 1) % NUM_HANDLES_PER_PAGE;
41ce5df0b8SMichael Guralnik 	struct frmr_handles_page *page;
42ce5df0b8SMichael Guralnik 	u32 handle;
43ce5df0b8SMichael Guralnik 
44ce5df0b8SMichael Guralnik 	page = list_last_entry(&queue->pages_list, struct frmr_handles_page,
45ce5df0b8SMichael Guralnik 			       list);
46ce5df0b8SMichael Guralnik 	handle = page->handles[tmp];
47ce5df0b8SMichael Guralnik 	queue->ci--;
48ce5df0b8SMichael Guralnik 
49ce5df0b8SMichael Guralnik 	if (!tmp) {
50ce5df0b8SMichael Guralnik 		list_del(&page->list);
51ce5df0b8SMichael Guralnik 		queue->num_pages--;
52ce5df0b8SMichael Guralnik 		kfree(page);
53ce5df0b8SMichael Guralnik 	}
54ce5df0b8SMichael Guralnik 
55ce5df0b8SMichael Guralnik 	return handle;
56ce5df0b8SMichael Guralnik }
57ce5df0b8SMichael Guralnik 
58ce5df0b8SMichael Guralnik static bool pop_frmr_handles_page(struct ib_frmr_pool *pool,
59ce5df0b8SMichael Guralnik 				  struct frmr_queue *queue,
60ce5df0b8SMichael Guralnik 				  struct frmr_handles_page **page, u32 *count)
61ce5df0b8SMichael Guralnik {
62ce5df0b8SMichael Guralnik 	spin_lock(&pool->lock);
63ce5df0b8SMichael Guralnik 	if (list_empty(&queue->pages_list)) {
64ce5df0b8SMichael Guralnik 		spin_unlock(&pool->lock);
65ce5df0b8SMichael Guralnik 		return false;
66ce5df0b8SMichael Guralnik 	}
67ce5df0b8SMichael Guralnik 
68ce5df0b8SMichael Guralnik 	*page = list_first_entry(&queue->pages_list, struct frmr_handles_page,
69ce5df0b8SMichael Guralnik 				 list);
70ce5df0b8SMichael Guralnik 	list_del(&(*page)->list);
71ce5df0b8SMichael Guralnik 	queue->num_pages--;
72ce5df0b8SMichael Guralnik 
73ce5df0b8SMichael Guralnik 	/* If this is the last page, count may be less than
74ce5df0b8SMichael Guralnik 	 * NUM_HANDLES_PER_PAGE.
75ce5df0b8SMichael Guralnik 	 */
76ce5df0b8SMichael Guralnik 	if (queue->ci >= NUM_HANDLES_PER_PAGE)
77ce5df0b8SMichael Guralnik 		*count = NUM_HANDLES_PER_PAGE;
78ce5df0b8SMichael Guralnik 	else
79ce5df0b8SMichael Guralnik 		*count = queue->ci;
80ce5df0b8SMichael Guralnik 
81ce5df0b8SMichael Guralnik 	queue->ci -= *count;
82ce5df0b8SMichael Guralnik 	spin_unlock(&pool->lock);
83ce5df0b8SMichael Guralnik 	return true;
84ce5df0b8SMichael Guralnik }
85ce5df0b8SMichael Guralnik 
8684cb1dd0SMichael Guralnik static void destroy_all_handles_in_queue(struct ib_device *device,
8784cb1dd0SMichael Guralnik 					 struct ib_frmr_pool *pool,
8884cb1dd0SMichael Guralnik 					 struct frmr_queue *queue)
89ce5df0b8SMichael Guralnik {
90ce5df0b8SMichael Guralnik 	struct ib_frmr_pools *pools = device->frmr_pools;
91ce5df0b8SMichael Guralnik 	struct frmr_handles_page *page;
92ce5df0b8SMichael Guralnik 	u32 count;
93ce5df0b8SMichael Guralnik 
9484cb1dd0SMichael Guralnik 	while (pop_frmr_handles_page(pool, queue, &page, &count)) {
95ce5df0b8SMichael Guralnik 		pools->pool_ops->destroy_frmrs(device, page->handles, count);
96ce5df0b8SMichael Guralnik 		kfree(page);
97ce5df0b8SMichael Guralnik 	}
9884cb1dd0SMichael Guralnik }
9984cb1dd0SMichael Guralnik 
100020d189dSMichael Guralnik static bool age_pinned_pool(struct ib_device *device, struct ib_frmr_pool *pool)
101020d189dSMichael Guralnik {
102020d189dSMichael Guralnik 	struct ib_frmr_pools *pools = device->frmr_pools;
103020d189dSMichael Guralnik 	u32 total, to_destroy, destroyed = 0;
104020d189dSMichael Guralnik 	bool has_work = false;
105020d189dSMichael Guralnik 	u32 *handles;
106020d189dSMichael Guralnik 	u32 handle;
107020d189dSMichael Guralnik 
108020d189dSMichael Guralnik 	spin_lock(&pool->lock);
109020d189dSMichael Guralnik 	total = pool->queue.ci + pool->inactive_queue.ci + pool->in_use;
110020d189dSMichael Guralnik 	if (total <= pool->pinned_handles) {
111020d189dSMichael Guralnik 		spin_unlock(&pool->lock);
112020d189dSMichael Guralnik 		return false;
113020d189dSMichael Guralnik 	}
114020d189dSMichael Guralnik 
115020d189dSMichael Guralnik 	to_destroy = total - pool->pinned_handles;
116020d189dSMichael Guralnik 
117020d189dSMichael Guralnik 	handles = kcalloc(to_destroy, sizeof(*handles), GFP_ATOMIC);
118020d189dSMichael Guralnik 	if (!handles) {
119020d189dSMichael Guralnik 		spin_unlock(&pool->lock);
120020d189dSMichael Guralnik 		return true;
121020d189dSMichael Guralnik 	}
122020d189dSMichael Guralnik 
123020d189dSMichael Guralnik 	/* Destroy all excess handles in the inactive queue */
124020d189dSMichael Guralnik 	while (pool->inactive_queue.ci && destroyed < to_destroy) {
125020d189dSMichael Guralnik 		handles[destroyed++] = pop_handle_from_queue_locked(
126020d189dSMichael Guralnik 			&pool->inactive_queue);
127020d189dSMichael Guralnik 	}
128020d189dSMichael Guralnik 
129020d189dSMichael Guralnik 	/* Move all handles from regular queue to inactive queue */
130020d189dSMichael Guralnik 	while (pool->queue.ci) {
131020d189dSMichael Guralnik 		handle = pop_handle_from_queue_locked(&pool->queue);
132020d189dSMichael Guralnik 		push_handle_to_queue_locked(&pool->inactive_queue, handle);
133020d189dSMichael Guralnik 		has_work = true;
134020d189dSMichael Guralnik 	}
135020d189dSMichael Guralnik 
136020d189dSMichael Guralnik 	spin_unlock(&pool->lock);
137020d189dSMichael Guralnik 
138020d189dSMichael Guralnik 	if (destroyed)
139020d189dSMichael Guralnik 		pools->pool_ops->destroy_frmrs(device, handles, destroyed);
140020d189dSMichael Guralnik 	kfree(handles);
141020d189dSMichael Guralnik 	return has_work;
142020d189dSMichael Guralnik }
143020d189dSMichael Guralnik 
14484cb1dd0SMichael Guralnik static void pool_aging_work(struct work_struct *work)
14584cb1dd0SMichael Guralnik {
14684cb1dd0SMichael Guralnik 	struct ib_frmr_pool *pool = container_of(
14784cb1dd0SMichael Guralnik 		to_delayed_work(work), struct ib_frmr_pool, aging_work);
14884cb1dd0SMichael Guralnik 	struct ib_frmr_pools *pools = pool->device->frmr_pools;
14984cb1dd0SMichael Guralnik 	bool has_work = false;
15084cb1dd0SMichael Guralnik 
151020d189dSMichael Guralnik 	if (pool->pinned_handles) {
152020d189dSMichael Guralnik 		has_work = age_pinned_pool(pool->device, pool);
153020d189dSMichael Guralnik 		goto out;
154020d189dSMichael Guralnik 	}
155020d189dSMichael Guralnik 
15684cb1dd0SMichael Guralnik 	destroy_all_handles_in_queue(pool->device, pool, &pool->inactive_queue);
15784cb1dd0SMichael Guralnik 
15884cb1dd0SMichael Guralnik 	/* Move all pages from regular queue to inactive queue */
15984cb1dd0SMichael Guralnik 	spin_lock(&pool->lock);
16084cb1dd0SMichael Guralnik 	if (pool->queue.ci > 0) {
16184cb1dd0SMichael Guralnik 		list_splice_tail_init(&pool->queue.pages_list,
16284cb1dd0SMichael Guralnik 				      &pool->inactive_queue.pages_list);
16384cb1dd0SMichael Guralnik 		pool->inactive_queue.num_pages = pool->queue.num_pages;
16484cb1dd0SMichael Guralnik 		pool->inactive_queue.ci = pool->queue.ci;
16584cb1dd0SMichael Guralnik 
16684cb1dd0SMichael Guralnik 		pool->queue.num_pages = 0;
16784cb1dd0SMichael Guralnik 		pool->queue.ci = 0;
16884cb1dd0SMichael Guralnik 		has_work = true;
16984cb1dd0SMichael Guralnik 	}
17084cb1dd0SMichael Guralnik 	spin_unlock(&pool->lock);
17184cb1dd0SMichael Guralnik 
172020d189dSMichael Guralnik out:
17384cb1dd0SMichael Guralnik 	/* Reschedule if there are handles to age in next aging period */
17484cb1dd0SMichael Guralnik 	if (has_work)
17584cb1dd0SMichael Guralnik 		queue_delayed_work(
17684cb1dd0SMichael Guralnik 			pools->aging_wq, &pool->aging_work,
177*d2ea675eSMichael Guralnik 			secs_to_jiffies(READ_ONCE(pools->aging_period_sec)));
17884cb1dd0SMichael Guralnik }
17984cb1dd0SMichael Guralnik 
18084cb1dd0SMichael Guralnik static void destroy_frmr_pool(struct ib_device *device,
18184cb1dd0SMichael Guralnik 			      struct ib_frmr_pool *pool)
18284cb1dd0SMichael Guralnik {
18384cb1dd0SMichael Guralnik 	cancel_delayed_work_sync(&pool->aging_work);
18484cb1dd0SMichael Guralnik 	destroy_all_handles_in_queue(device, pool, &pool->queue);
18584cb1dd0SMichael Guralnik 	destroy_all_handles_in_queue(device, pool, &pool->inactive_queue);
186ce5df0b8SMichael Guralnik 
187ce5df0b8SMichael Guralnik 	kfree(pool);
188ce5df0b8SMichael Guralnik }
189ce5df0b8SMichael Guralnik 
190ce5df0b8SMichael Guralnik /*
191ce5df0b8SMichael Guralnik  * Initialize the FRMR pools for a device.
192ce5df0b8SMichael Guralnik  *
193ce5df0b8SMichael Guralnik  * @device: The device to initialize the FRMR pools for.
194ce5df0b8SMichael Guralnik  * @pool_ops: The pool operations to use.
195ce5df0b8SMichael Guralnik  *
196ce5df0b8SMichael Guralnik  * Returns 0 on success, negative error code on failure.
197ce5df0b8SMichael Guralnik  */
198ce5df0b8SMichael Guralnik int ib_frmr_pools_init(struct ib_device *device,
199ce5df0b8SMichael Guralnik 		       const struct ib_frmr_pool_ops *pool_ops)
200ce5df0b8SMichael Guralnik {
201ce5df0b8SMichael Guralnik 	struct ib_frmr_pools *pools;
202ce5df0b8SMichael Guralnik 
203ce5df0b8SMichael Guralnik 	pools = kzalloc_obj(*pools);
204ce5df0b8SMichael Guralnik 	if (!pools)
205ce5df0b8SMichael Guralnik 		return -ENOMEM;
206ce5df0b8SMichael Guralnik 
207ce5df0b8SMichael Guralnik 	pools->rb_root = RB_ROOT;
208ce5df0b8SMichael Guralnik 	rwlock_init(&pools->rb_lock);
209ce5df0b8SMichael Guralnik 	pools->pool_ops = pool_ops;
21084cb1dd0SMichael Guralnik 	pools->aging_wq = create_singlethread_workqueue("frmr_aging_wq");
21184cb1dd0SMichael Guralnik 	if (!pools->aging_wq) {
21284cb1dd0SMichael Guralnik 		kfree(pools);
21384cb1dd0SMichael Guralnik 		return -ENOMEM;
21484cb1dd0SMichael Guralnik 	}
215ce5df0b8SMichael Guralnik 
216*d2ea675eSMichael Guralnik 	pools->aging_period_sec = FRMR_POOLS_DEFAULT_AGING_PERIOD_SECS;
217*d2ea675eSMichael Guralnik 
218ce5df0b8SMichael Guralnik 	device->frmr_pools = pools;
219ce5df0b8SMichael Guralnik 	return 0;
220ce5df0b8SMichael Guralnik }
221ce5df0b8SMichael Guralnik EXPORT_SYMBOL(ib_frmr_pools_init);
222ce5df0b8SMichael Guralnik 
223ce5df0b8SMichael Guralnik /*
224ce5df0b8SMichael Guralnik  * Clean up the FRMR pools for a device.
225ce5df0b8SMichael Guralnik  *
226ce5df0b8SMichael Guralnik  * @device: The device to clean up the FRMR pools for.
227ce5df0b8SMichael Guralnik  *
228ce5df0b8SMichael Guralnik  * Call cleanup only after all FRMR handles have been pushed back to the pool
229ce5df0b8SMichael Guralnik  * and no other FRMR operations are allowed to run in parallel.
230ce5df0b8SMichael Guralnik  * Ensuring this allows us to save synchronization overhead in pop and push
231ce5df0b8SMichael Guralnik  * operations.
232ce5df0b8SMichael Guralnik  */
233ce5df0b8SMichael Guralnik void ib_frmr_pools_cleanup(struct ib_device *device)
234ce5df0b8SMichael Guralnik {
235ce5df0b8SMichael Guralnik 	struct ib_frmr_pools *pools = device->frmr_pools;
236ce5df0b8SMichael Guralnik 	struct ib_frmr_pool *pool, *next;
237ce5df0b8SMichael Guralnik 
238ce5df0b8SMichael Guralnik 	if (!pools)
239ce5df0b8SMichael Guralnik 		return;
240ce5df0b8SMichael Guralnik 
241ce5df0b8SMichael Guralnik 	rbtree_postorder_for_each_entry_safe(pool, next, &pools->rb_root, node)
242ce5df0b8SMichael Guralnik 		destroy_frmr_pool(device, pool);
243ce5df0b8SMichael Guralnik 
24484cb1dd0SMichael Guralnik 	destroy_workqueue(pools->aging_wq);
245ce5df0b8SMichael Guralnik 	kfree(pools);
246ce5df0b8SMichael Guralnik 	device->frmr_pools = NULL;
247ce5df0b8SMichael Guralnik }
248ce5df0b8SMichael Guralnik EXPORT_SYMBOL(ib_frmr_pools_cleanup);
249ce5df0b8SMichael Guralnik 
250*d2ea675eSMichael Guralnik int ib_frmr_pools_set_aging_period(struct ib_device *device, u32 period_sec)
251*d2ea675eSMichael Guralnik {
252*d2ea675eSMichael Guralnik 	struct ib_frmr_pools *pools = device->frmr_pools;
253*d2ea675eSMichael Guralnik 	struct ib_frmr_pool *pool;
254*d2ea675eSMichael Guralnik 	struct rb_node *node;
255*d2ea675eSMichael Guralnik 
256*d2ea675eSMichael Guralnik 	if (!pools)
257*d2ea675eSMichael Guralnik 		return -EINVAL;
258*d2ea675eSMichael Guralnik 
259*d2ea675eSMichael Guralnik 	if (period_sec == 0)
260*d2ea675eSMichael Guralnik 		return -EINVAL;
261*d2ea675eSMichael Guralnik 
262*d2ea675eSMichael Guralnik 	WRITE_ONCE(pools->aging_period_sec, period_sec);
263*d2ea675eSMichael Guralnik 
264*d2ea675eSMichael Guralnik 	read_lock(&pools->rb_lock);
265*d2ea675eSMichael Guralnik 	for (node = rb_first(&pools->rb_root); node; node = rb_next(node)) {
266*d2ea675eSMichael Guralnik 		pool = rb_entry(node, struct ib_frmr_pool, node);
267*d2ea675eSMichael Guralnik 		mod_delayed_work(pools->aging_wq, &pool->aging_work,
268*d2ea675eSMichael Guralnik 				 secs_to_jiffies(period_sec));
269*d2ea675eSMichael Guralnik 	}
270*d2ea675eSMichael Guralnik 	read_unlock(&pools->rb_lock);
271*d2ea675eSMichael Guralnik 
272*d2ea675eSMichael Guralnik 	return 0;
273*d2ea675eSMichael Guralnik }
274*d2ea675eSMichael Guralnik 
275ce5df0b8SMichael Guralnik static inline int compare_keys(struct ib_frmr_key *key1,
276ce5df0b8SMichael Guralnik 			       struct ib_frmr_key *key2)
277ce5df0b8SMichael Guralnik {
278ce5df0b8SMichael Guralnik 	int res;
279ce5df0b8SMichael Guralnik 
280ce5df0b8SMichael Guralnik 	res = cmp_int(key1->ats, key2->ats);
281ce5df0b8SMichael Guralnik 	if (res)
282ce5df0b8SMichael Guralnik 		return res;
283ce5df0b8SMichael Guralnik 
284ce5df0b8SMichael Guralnik 	res = cmp_int(key1->access_flags, key2->access_flags);
285ce5df0b8SMichael Guralnik 	if (res)
286ce5df0b8SMichael Guralnik 		return res;
287ce5df0b8SMichael Guralnik 
288ce5df0b8SMichael Guralnik 	res = cmp_int(key1->vendor_key, key2->vendor_key);
289ce5df0b8SMichael Guralnik 	if (res)
290ce5df0b8SMichael Guralnik 		return res;
291ce5df0b8SMichael Guralnik 
292ce5df0b8SMichael Guralnik 	res = cmp_int(key1->kernel_vendor_key, key2->kernel_vendor_key);
293ce5df0b8SMichael Guralnik 	if (res)
294ce5df0b8SMichael Guralnik 		return res;
295ce5df0b8SMichael Guralnik 
296ce5df0b8SMichael Guralnik 	/*
297ce5df0b8SMichael Guralnik 	 * allow using handles that support more DMA blocks, up to twice the
298ce5df0b8SMichael Guralnik 	 * requested number
299ce5df0b8SMichael Guralnik 	 */
300ce5df0b8SMichael Guralnik 	res = cmp_int(key1->num_dma_blocks, key2->num_dma_blocks);
301ce5df0b8SMichael Guralnik 	if (res > 0) {
302ce5df0b8SMichael Guralnik 		if (key1->num_dma_blocks - key2->num_dma_blocks <
303ce5df0b8SMichael Guralnik 		    key2->num_dma_blocks)
304ce5df0b8SMichael Guralnik 			return 0;
305ce5df0b8SMichael Guralnik 	}
306ce5df0b8SMichael Guralnik 
307ce5df0b8SMichael Guralnik 	return res;
308ce5df0b8SMichael Guralnik }
309ce5df0b8SMichael Guralnik 
310ce5df0b8SMichael Guralnik static int frmr_pool_cmp_find(const void *key, const struct rb_node *node)
311ce5df0b8SMichael Guralnik {
312ce5df0b8SMichael Guralnik 	struct ib_frmr_pool *pool = rb_entry(node, struct ib_frmr_pool, node);
313ce5df0b8SMichael Guralnik 
314ce5df0b8SMichael Guralnik 	return compare_keys(&pool->key, (struct ib_frmr_key *)key);
315ce5df0b8SMichael Guralnik }
316ce5df0b8SMichael Guralnik 
317ce5df0b8SMichael Guralnik static int frmr_pool_cmp_add(struct rb_node *new, const struct rb_node *node)
318ce5df0b8SMichael Guralnik {
319ce5df0b8SMichael Guralnik 	struct ib_frmr_pool *new_pool =
320ce5df0b8SMichael Guralnik 		rb_entry(new, struct ib_frmr_pool, node);
321ce5df0b8SMichael Guralnik 	struct ib_frmr_pool *pool = rb_entry(node, struct ib_frmr_pool, node);
322ce5df0b8SMichael Guralnik 
323ce5df0b8SMichael Guralnik 	return compare_keys(&pool->key, &new_pool->key);
324ce5df0b8SMichael Guralnik }
325ce5df0b8SMichael Guralnik 
326ce5df0b8SMichael Guralnik static struct ib_frmr_pool *ib_frmr_pool_find(struct ib_frmr_pools *pools,
327ce5df0b8SMichael Guralnik 					      struct ib_frmr_key *key)
328ce5df0b8SMichael Guralnik {
329ce5df0b8SMichael Guralnik 	struct ib_frmr_pool *pool;
330ce5df0b8SMichael Guralnik 	struct rb_node *node;
331ce5df0b8SMichael Guralnik 
332ce5df0b8SMichael Guralnik 	/* find operation is done under read lock for performance reasons.
333ce5df0b8SMichael Guralnik 	 * The case of threads failing to find the same pool and creating it
334ce5df0b8SMichael Guralnik 	 * is handled by the create_frmr_pool function.
335ce5df0b8SMichael Guralnik 	 */
336ce5df0b8SMichael Guralnik 	read_lock(&pools->rb_lock);
337ce5df0b8SMichael Guralnik 	node = rb_find(key, &pools->rb_root, frmr_pool_cmp_find);
338ce5df0b8SMichael Guralnik 	pool = rb_entry_safe(node, struct ib_frmr_pool, node);
339ce5df0b8SMichael Guralnik 	read_unlock(&pools->rb_lock);
340ce5df0b8SMichael Guralnik 
341ce5df0b8SMichael Guralnik 	return pool;
342ce5df0b8SMichael Guralnik }
343ce5df0b8SMichael Guralnik 
344ce5df0b8SMichael Guralnik static struct ib_frmr_pool *create_frmr_pool(struct ib_device *device,
345ce5df0b8SMichael Guralnik 					     struct ib_frmr_key *key)
346ce5df0b8SMichael Guralnik {
347ce5df0b8SMichael Guralnik 	struct ib_frmr_pools *pools = device->frmr_pools;
348ce5df0b8SMichael Guralnik 	struct ib_frmr_pool *pool;
349ce5df0b8SMichael Guralnik 	struct rb_node *existing;
350ce5df0b8SMichael Guralnik 
351ce5df0b8SMichael Guralnik 	pool = kzalloc_obj(*pool);
352ce5df0b8SMichael Guralnik 	if (!pool)
353ce5df0b8SMichael Guralnik 		return ERR_PTR(-ENOMEM);
354ce5df0b8SMichael Guralnik 
355ce5df0b8SMichael Guralnik 	memcpy(&pool->key, key, sizeof(*key));
356ce5df0b8SMichael Guralnik 	INIT_LIST_HEAD(&pool->queue.pages_list);
35784cb1dd0SMichael Guralnik 	INIT_LIST_HEAD(&pool->inactive_queue.pages_list);
358ce5df0b8SMichael Guralnik 	spin_lock_init(&pool->lock);
35984cb1dd0SMichael Guralnik 	INIT_DELAYED_WORK(&pool->aging_work, pool_aging_work);
36084cb1dd0SMichael Guralnik 	pool->device = device;
361ce5df0b8SMichael Guralnik 
362ce5df0b8SMichael Guralnik 	write_lock(&pools->rb_lock);
363ce5df0b8SMichael Guralnik 	existing = rb_find_add(&pool->node, &pools->rb_root, frmr_pool_cmp_add);
364ce5df0b8SMichael Guralnik 	write_unlock(&pools->rb_lock);
365ce5df0b8SMichael Guralnik 
366ce5df0b8SMichael Guralnik 	/* If a different thread has already created the pool, return it.
367ce5df0b8SMichael Guralnik 	 * The insert operation is done under the write lock so we are sure
368ce5df0b8SMichael Guralnik 	 * that the pool is not inserted twice.
369ce5df0b8SMichael Guralnik 	 */
370ce5df0b8SMichael Guralnik 	if (existing) {
371ce5df0b8SMichael Guralnik 		kfree(pool);
372ce5df0b8SMichael Guralnik 		return rb_entry(existing, struct ib_frmr_pool, node);
373ce5df0b8SMichael Guralnik 	}
374ce5df0b8SMichael Guralnik 
375ce5df0b8SMichael Guralnik 	return pool;
376ce5df0b8SMichael Guralnik }
377ce5df0b8SMichael Guralnik 
378020d189dSMichael Guralnik int ib_frmr_pools_set_pinned(struct ib_device *device, struct ib_frmr_key *key,
379020d189dSMichael Guralnik 			     u32 pinned_handles)
380020d189dSMichael Guralnik {
381020d189dSMichael Guralnik 	struct ib_frmr_pools *pools = device->frmr_pools;
382020d189dSMichael Guralnik 	struct ib_frmr_key driver_key = {};
383020d189dSMichael Guralnik 	struct ib_frmr_pool *pool;
384020d189dSMichael Guralnik 	u32 needed_handles;
385020d189dSMichael Guralnik 	u32 current_total;
386020d189dSMichael Guralnik 	int i, ret = 0;
387020d189dSMichael Guralnik 	u32 *handles;
388020d189dSMichael Guralnik 
389020d189dSMichael Guralnik 	if (!pools)
390020d189dSMichael Guralnik 		return -EINVAL;
391020d189dSMichael Guralnik 
392020d189dSMichael Guralnik 	ret = ib_check_mr_access(device, key->access_flags);
393020d189dSMichael Guralnik 	if (ret)
394020d189dSMichael Guralnik 		return ret;
395020d189dSMichael Guralnik 
396020d189dSMichael Guralnik 	if (pools->pool_ops->build_key) {
397020d189dSMichael Guralnik 		ret = pools->pool_ops->build_key(device, key, &driver_key);
398020d189dSMichael Guralnik 		if (ret)
399020d189dSMichael Guralnik 			return ret;
400020d189dSMichael Guralnik 	} else {
401020d189dSMichael Guralnik 		memcpy(&driver_key, key, sizeof(*key));
402020d189dSMichael Guralnik 	}
403020d189dSMichael Guralnik 
404020d189dSMichael Guralnik 	pool = ib_frmr_pool_find(pools, &driver_key);
405020d189dSMichael Guralnik 	if (!pool) {
406020d189dSMichael Guralnik 		pool = create_frmr_pool(device, &driver_key);
407020d189dSMichael Guralnik 		if (IS_ERR(pool))
408020d189dSMichael Guralnik 			return PTR_ERR(pool);
409020d189dSMichael Guralnik 	}
410020d189dSMichael Guralnik 
411020d189dSMichael Guralnik 	spin_lock(&pool->lock);
412020d189dSMichael Guralnik 	current_total = pool->in_use + pool->queue.ci + pool->inactive_queue.ci;
413020d189dSMichael Guralnik 
414020d189dSMichael Guralnik 	if (current_total < pinned_handles)
415020d189dSMichael Guralnik 		needed_handles = pinned_handles - current_total;
416020d189dSMichael Guralnik 	else
417020d189dSMichael Guralnik 		needed_handles = 0;
418020d189dSMichael Guralnik 
419020d189dSMichael Guralnik 	pool->pinned_handles = pinned_handles;
420020d189dSMichael Guralnik 	spin_unlock(&pool->lock);
421020d189dSMichael Guralnik 
422020d189dSMichael Guralnik 	if (!needed_handles)
423020d189dSMichael Guralnik 		goto schedule_aging;
424020d189dSMichael Guralnik 
425020d189dSMichael Guralnik 	handles = kcalloc(needed_handles, sizeof(*handles), GFP_KERNEL);
426020d189dSMichael Guralnik 	if (!handles)
427020d189dSMichael Guralnik 		return -ENOMEM;
428020d189dSMichael Guralnik 
429020d189dSMichael Guralnik 	ret = pools->pool_ops->create_frmrs(device, key, handles,
430020d189dSMichael Guralnik 					    needed_handles);
431020d189dSMichael Guralnik 	if (ret) {
432020d189dSMichael Guralnik 		kfree(handles);
433020d189dSMichael Guralnik 		return ret;
434020d189dSMichael Guralnik 	}
435020d189dSMichael Guralnik 
436020d189dSMichael Guralnik 	spin_lock(&pool->lock);
437020d189dSMichael Guralnik 	for (i = 0; i < needed_handles; i++) {
438020d189dSMichael Guralnik 		ret = push_handle_to_queue_locked(&pool->queue,
439020d189dSMichael Guralnik 						  handles[i]);
440020d189dSMichael Guralnik 		if (ret)
441020d189dSMichael Guralnik 			goto end;
442020d189dSMichael Guralnik 	}
443020d189dSMichael Guralnik 
444020d189dSMichael Guralnik end:
445020d189dSMichael Guralnik 	spin_unlock(&pool->lock);
446020d189dSMichael Guralnik 	kfree(handles);
447020d189dSMichael Guralnik 
448020d189dSMichael Guralnik schedule_aging:
449020d189dSMichael Guralnik 	/* Ensure aging is scheduled to adjust to new pinned handles count */
450020d189dSMichael Guralnik 	mod_delayed_work(pools->aging_wq, &pool->aging_work, 0);
451020d189dSMichael Guralnik 
452020d189dSMichael Guralnik 	return ret;
453020d189dSMichael Guralnik }
454020d189dSMichael Guralnik 
455ce5df0b8SMichael Guralnik static int get_frmr_from_pool(struct ib_device *device,
456ce5df0b8SMichael Guralnik 			      struct ib_frmr_pool *pool, struct ib_mr *mr)
457ce5df0b8SMichael Guralnik {
458ce5df0b8SMichael Guralnik 	struct ib_frmr_pools *pools = device->frmr_pools;
459ce5df0b8SMichael Guralnik 	u32 handle;
460ce5df0b8SMichael Guralnik 	int err;
461ce5df0b8SMichael Guralnik 
462ce5df0b8SMichael Guralnik 	spin_lock(&pool->lock);
463ce5df0b8SMichael Guralnik 	if (pool->queue.ci == 0) {
46484cb1dd0SMichael Guralnik 		if (pool->inactive_queue.ci > 0) {
46584cb1dd0SMichael Guralnik 			handle = pop_handle_from_queue_locked(
46684cb1dd0SMichael Guralnik 				&pool->inactive_queue);
46784cb1dd0SMichael Guralnik 		} else {
46884cb1dd0SMichael Guralnik 			spin_unlock(&pool->lock);
46984cb1dd0SMichael Guralnik 			err = pools->pool_ops->create_frmrs(device, &pool->key,
47084cb1dd0SMichael Guralnik 							    &handle, 1);
471ce5df0b8SMichael Guralnik 			if (err)
472ce5df0b8SMichael Guralnik 				return err;
473304725adSMichael Guralnik 			spin_lock(&pool->lock);
47484cb1dd0SMichael Guralnik 		}
475ce5df0b8SMichael Guralnik 	} else {
476ce5df0b8SMichael Guralnik 		handle = pop_handle_from_queue_locked(&pool->queue);
477ce5df0b8SMichael Guralnik 	}
478ce5df0b8SMichael Guralnik 
479304725adSMichael Guralnik 	pool->in_use++;
480304725adSMichael Guralnik 	if (pool->in_use > pool->max_in_use)
481304725adSMichael Guralnik 		pool->max_in_use = pool->in_use;
482304725adSMichael Guralnik 
483304725adSMichael Guralnik 	spin_unlock(&pool->lock);
484304725adSMichael Guralnik 
485ce5df0b8SMichael Guralnik 	mr->frmr.pool = pool;
486ce5df0b8SMichael Guralnik 	mr->frmr.handle = handle;
487ce5df0b8SMichael Guralnik 
488ce5df0b8SMichael Guralnik 	return 0;
489ce5df0b8SMichael Guralnik }
490ce5df0b8SMichael Guralnik 
491ce5df0b8SMichael Guralnik /*
492ce5df0b8SMichael Guralnik  * Pop an FRMR handle from the pool.
493ce5df0b8SMichael Guralnik  *
494ce5df0b8SMichael Guralnik  * @device: The device to pop the FRMR handle from.
495ce5df0b8SMichael Guralnik  * @mr: The MR to pop the FRMR handle from.
496ce5df0b8SMichael Guralnik  *
497ce5df0b8SMichael Guralnik  * Returns 0 on success, negative error code on failure.
498ce5df0b8SMichael Guralnik  */
499ce5df0b8SMichael Guralnik int ib_frmr_pool_pop(struct ib_device *device, struct ib_mr *mr)
500ce5df0b8SMichael Guralnik {
501ce5df0b8SMichael Guralnik 	struct ib_frmr_pools *pools = device->frmr_pools;
502ce5df0b8SMichael Guralnik 	struct ib_frmr_pool *pool;
503ce5df0b8SMichael Guralnik 
504ce5df0b8SMichael Guralnik 	WARN_ON_ONCE(!device->frmr_pools);
505ce5df0b8SMichael Guralnik 	pool = ib_frmr_pool_find(pools, &mr->frmr.key);
506ce5df0b8SMichael Guralnik 	if (!pool) {
507ce5df0b8SMichael Guralnik 		pool = create_frmr_pool(device, &mr->frmr.key);
508ce5df0b8SMichael Guralnik 		if (IS_ERR(pool))
509ce5df0b8SMichael Guralnik 			return PTR_ERR(pool);
510ce5df0b8SMichael Guralnik 	}
511ce5df0b8SMichael Guralnik 
512ce5df0b8SMichael Guralnik 	return get_frmr_from_pool(device, pool, mr);
513ce5df0b8SMichael Guralnik }
514ce5df0b8SMichael Guralnik EXPORT_SYMBOL(ib_frmr_pool_pop);
515ce5df0b8SMichael Guralnik 
516ce5df0b8SMichael Guralnik /*
517ce5df0b8SMichael Guralnik  * Push an FRMR handle back to the pool.
518ce5df0b8SMichael Guralnik  *
519ce5df0b8SMichael Guralnik  * @device: The device to push the FRMR handle to.
520ce5df0b8SMichael Guralnik  * @mr: The MR containing the FRMR handle to push back to the pool.
521ce5df0b8SMichael Guralnik  *
522ce5df0b8SMichael Guralnik  * Returns 0 on success, negative error code on failure.
523ce5df0b8SMichael Guralnik  */
524ce5df0b8SMichael Guralnik int ib_frmr_pool_push(struct ib_device *device, struct ib_mr *mr)
525ce5df0b8SMichael Guralnik {
526ce5df0b8SMichael Guralnik 	struct ib_frmr_pool *pool = mr->frmr.pool;
52784cb1dd0SMichael Guralnik 	struct ib_frmr_pools *pools = device->frmr_pools;
52884cb1dd0SMichael Guralnik 	bool schedule_aging = false;
529ce5df0b8SMichael Guralnik 	int ret;
530ce5df0b8SMichael Guralnik 
531ce5df0b8SMichael Guralnik 	spin_lock(&pool->lock);
53284cb1dd0SMichael Guralnik 	/* Schedule aging every time an empty pool becomes non-empty */
53384cb1dd0SMichael Guralnik 	if (pool->queue.ci == 0)
53484cb1dd0SMichael Guralnik 		schedule_aging = true;
535ce5df0b8SMichael Guralnik 	ret = push_handle_to_queue_locked(&pool->queue, mr->frmr.handle);
536304725adSMichael Guralnik 	if (ret == 0)
537304725adSMichael Guralnik 		pool->in_use--;
538304725adSMichael Guralnik 
539ce5df0b8SMichael Guralnik 	spin_unlock(&pool->lock);
540ce5df0b8SMichael Guralnik 
54184cb1dd0SMichael Guralnik 	if (ret == 0 && schedule_aging)
54284cb1dd0SMichael Guralnik 		queue_delayed_work(pools->aging_wq, &pool->aging_work,
543*d2ea675eSMichael Guralnik 			secs_to_jiffies(READ_ONCE(pools->aging_period_sec)));
54484cb1dd0SMichael Guralnik 
545ce5df0b8SMichael Guralnik 	return ret;
546ce5df0b8SMichael Guralnik }
547ce5df0b8SMichael Guralnik EXPORT_SYMBOL(ib_frmr_pool_push);
548