xref: /linux/drivers/infiniband/sw/siw/siw_mem.c (revision af841056860d2dc2754a122fb79abbe92f3752f3)
1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
4 /* Copyright (c) 2008-2019, IBM Corporation */
5 
6 #include <linux/gfp.h>
7 #include <rdma/ib_verbs.h>
8 #include <rdma/ib_umem.h>
9 #include <linux/dma-mapping.h>
10 #include <linux/slab.h>
11 #include <linux/sched/mm.h>
12 #include <linux/resource.h>
13 
14 #include "siw.h"
15 #include "siw_mem.h"
16 
17 /* Stag lookup is based on its index part only (24 bits). */
18 #define SIW_STAG_MAX_INDEX	0x00ffffff
19 
20 /*
21  * siw_mem_id2obj()
22  *
23  * resolves memory from stag given by id. might be called from:
24  * o process context before sending out of sgl, or
25  * o in softirq when resolving target memory
26  */
27 struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
28 {
29 	struct siw_mem *mem;
30 
31 	rcu_read_lock();
32 	mem = xa_load(&sdev->mem_xa, stag_index);
33 	if (likely(mem && kref_get_unless_zero(&mem->ref))) {
34 		rcu_read_unlock();
35 		return mem;
36 	}
37 	rcu_read_unlock();
38 
39 	return NULL;
40 }
41 
42 void siw_umem_release(struct siw_umem *umem)
43 {
44 	unsigned int i, num_chunks = umem->num_chunks;
45 
46 	if (umem->base_mem)
47 		ib_umem_release(umem->base_mem);
48 
49 	for (i = 0; i < num_chunks; i++)
50 		kfree(umem->page_chunk[i].plist);
51 
52 	kfree(umem);
53 }
54 
55 int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
56 		   u64 start, u64 len, int rights)
57 {
58 	struct siw_device *sdev = to_siw_dev(pd->device);
59 	struct siw_mem *mem = kzalloc_obj(*mem);
60 	struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX);
61 	u32 id, next;
62 
63 	if (!mem)
64 		return -ENOMEM;
65 
66 	mem->mem_obj = mem_obj;
67 	mem->stag_valid = 0;
68 	mem->sdev = sdev;
69 	mem->va = start;
70 	mem->len = len;
71 	mem->pd = pd;
72 	mem->perms = rights & IWARP_ACCESS_MASK;
73 	kref_init(&mem->ref);
74 
75 	get_random_bytes(&next, 4);
76 	next &= SIW_STAG_MAX_INDEX;
77 
78 	if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next,
79 	    GFP_KERNEL) < 0) {
80 		kfree(mem);
81 		return -ENOMEM;
82 	}
83 
84 	mr->mem = mem;
85 	/* Set the STag index part */
86 	mem->stag = id << 8;
87 	mr->base_mr.lkey = mr->base_mr.rkey = mem->stag;
88 
89 	return 0;
90 }
91 
92 void siw_mr_drop_mem(struct siw_mr *mr)
93 {
94 	struct siw_mem *mem = mr->mem, *found;
95 
96 	mem->stag_valid = 0;
97 
98 	/* make STag invalid visible asap */
99 	smp_mb();
100 
101 	found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8);
102 	WARN_ON(found != mem);
103 	siw_mem_put(mem);
104 }
105 
106 void siw_free_mem(struct kref *ref)
107 {
108 	struct siw_mem *mem = container_of(ref, struct siw_mem, ref);
109 
110 	siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n");
111 
112 	if (!mem->is_mw && mem->mem_obj) {
113 		if (mem->is_pbl == 0)
114 			siw_umem_release(mem->umem);
115 		else
116 			kfree(mem->pbl);
117 	}
118 	kfree(mem);
119 }
120 
121 /*
122  * siw_check_mem()
123  *
124  * Check protection domain, STAG state, access permissions and
125  * address range for memory object.
126  *
127  * @pd:		Protection Domain memory should belong to
128  * @mem:	memory to be checked
129  * @addr:	starting addr of mem
130  * @perms:	requested access permissions
131  * @len:	len of memory interval to be checked
132  *
133  */
134 int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
135 		  enum ib_access_flags perms, int len)
136 {
137 	if (!mem->stag_valid) {
138 		siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag);
139 		return -E_STAG_INVALID;
140 	}
141 	if (mem->pd != pd) {
142 		siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag);
143 		return -E_PD_MISMATCH;
144 	}
145 	/*
146 	 * check access permissions
147 	 */
148 	if ((mem->perms & perms) < perms) {
149 		siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n",
150 			   mem->perms, perms);
151 		return -E_ACCESS_PERM;
152 	}
153 	/*
154 	 * Check if access falls into valid memory interval.
155 	 */
156 	if (addr < mem->va || addr + len > mem->va + mem->len) {
157 		siw_dbg_pd(pd, "MEM interval len %d\n", len);
158 		siw_dbg_pd(pd, "[0x%p, 0x%p] out of bounds\n",
159 			   (void *)(uintptr_t)addr,
160 			   (void *)(uintptr_t)(addr + len));
161 		siw_dbg_pd(pd, "[0x%p, 0x%p] STag=0x%08x\n",
162 			   (void *)(uintptr_t)mem->va,
163 			   (void *)(uintptr_t)(mem->va + mem->len),
164 			   mem->stag);
165 
166 		return -E_BASE_BOUNDS;
167 	}
168 	return E_ACCESS_OK;
169 }
170 
171 /*
172  * siw_check_sge()
173  *
174  * Check SGE for access rights in given interval
175  *
176  * @pd:		Protection Domain memory should belong to
177  * @sge:	SGE to be checked
178  * @mem:	location of memory reference within array
179  * @perms:	requested access permissions
180  * @off:	starting offset in SGE
181  * @len:	len of memory interval to be checked
182  *
183  * NOTE: Function references SGE's memory object (mem->obj)
184  * if not yet done. New reference is kept if check went ok and
185  * released if check failed. If mem->obj is already valid, no new
186  * lookup is being done and mem is not released it check fails.
187  */
188 int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[],
189 		  enum ib_access_flags perms, u32 off, u32 len)
190 {
191 	struct siw_device *sdev = to_siw_dev(pd->device);
192 	struct siw_mem *new = NULL;
193 	int rv = E_ACCESS_OK;
194 
195 	if (len + off > sge->length) {
196 		rv = -E_BASE_BOUNDS;
197 		goto fail;
198 	}
199 	if (*mem == NULL) {
200 		new = siw_mem_id2obj(sdev, sge->lkey >> 8);
201 		if (unlikely(!new)) {
202 			siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey);
203 			rv = -E_STAG_INVALID;
204 			goto fail;
205 		}
206 		*mem = new;
207 	}
208 	/* Check if user re-registered with different STag key */
209 	if (unlikely((*mem)->stag != sge->lkey)) {
210 		siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey);
211 		rv = -E_STAG_INVALID;
212 		goto fail;
213 	}
214 	rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len);
215 	if (unlikely(rv))
216 		goto fail;
217 
218 	return 0;
219 
220 fail:
221 	if (new) {
222 		*mem = NULL;
223 		siw_mem_put(new);
224 	}
225 	return rv;
226 }
227 
228 void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op)
229 {
230 	switch (op) {
231 	case SIW_OP_SEND:
232 	case SIW_OP_WRITE:
233 	case SIW_OP_SEND_WITH_IMM:
234 	case SIW_OP_SEND_REMOTE_INV:
235 	case SIW_OP_READ:
236 	case SIW_OP_READ_LOCAL_INV:
237 		if (!(wqe->sqe.flags & SIW_WQE_INLINE))
238 			siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge);
239 		break;
240 
241 	case SIW_OP_RECEIVE:
242 		siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge);
243 		break;
244 
245 	case SIW_OP_READ_RESPONSE:
246 		siw_unref_mem_sgl(wqe->mem, 1);
247 		break;
248 
249 	default:
250 		/*
251 		 * SIW_OP_INVAL_STAG and SIW_OP_REG_MR
252 		 * do not hold memory references
253 		 */
254 		break;
255 	}
256 }
257 
258 int siw_invalidate_stag(struct ib_pd *pd, u32 stag)
259 {
260 	struct siw_device *sdev = to_siw_dev(pd->device);
261 	struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8);
262 	int rv = 0;
263 
264 	if (unlikely(!mem)) {
265 		siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag);
266 		return -EINVAL;
267 	}
268 	if (unlikely(mem->pd != pd)) {
269 		siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag);
270 		rv = -EACCES;
271 		goto out;
272 	}
273 	/*
274 	 * Per RDMA verbs definition, an STag may already be in invalid
275 	 * state if invalidation is requested. So no state check here.
276 	 */
277 	mem->stag_valid = 0;
278 
279 	siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag);
280 out:
281 	siw_mem_put(mem);
282 	return rv;
283 }
284 
285 /*
286  * Gets physical address backed by PBL element. Address is referenced
287  * by linear byte offset into list of variably sized PB elements.
288  * Optionally, provides remaining len within current element, and
289  * current PBL index for later resume at same element.
290  */
291 dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx)
292 {
293 	int i = idx ? *idx : 0;
294 
295 	while (i < pbl->num_buf) {
296 		struct siw_pble *pble = &pbl->pbe[i];
297 
298 		if (pble->pbl_off + pble->size > off) {
299 			u64 pble_off = off - pble->pbl_off;
300 
301 			if (len)
302 				*len = pble->size - pble_off;
303 			if (idx)
304 				*idx = i;
305 
306 			return pble->addr + pble_off;
307 		}
308 		i++;
309 	}
310 	if (len)
311 		*len = 0;
312 	return 0;
313 }
314 
315 struct siw_pbl *siw_pbl_alloc(u32 num_buf)
316 {
317 	struct siw_pbl *pbl;
318 
319 	if (num_buf == 0)
320 		return ERR_PTR(-EINVAL);
321 
322 	pbl = kzalloc_flex(*pbl, pbe, num_buf);
323 	if (!pbl)
324 		return ERR_PTR(-ENOMEM);
325 
326 	pbl->max_buf = num_buf;
327 
328 	return pbl;
329 }
330 
331 struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start,
332 			      u64 len, int rights)
333 {
334 	struct siw_umem *umem;
335 	struct ib_umem *base_mem;
336 	struct sg_page_iter sg_iter;
337 	struct sg_table *sgt;
338 	u64 first_page_va;
339 	unsigned int num_pages, num_chunks, i;
340 	int rv = 0;
341 
342 	if (!len)
343 		return ERR_PTR(-EINVAL);
344 
345 	first_page_va = start & PAGE_MASK;
346 	num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
347 	num_chunks = ((num_pages - 1) >> CHUNK_SHIFT) + 1;
348 
349 	umem = kzalloc_flex(*umem, page_chunk, num_chunks);
350 	if (!umem)
351 		return ERR_PTR(-ENOMEM);
352 
353 	base_mem = ib_umem_get_va(base_dev, start, len, rights);
354 	if (IS_ERR(base_mem)) {
355 		rv = PTR_ERR(base_mem);
356 		siw_dbg(base_dev, "Cannot pin user memory: %d\n", rv);
357 		goto err_out;
358 	}
359 	umem->fp_addr = first_page_va;
360 	umem->base_mem = base_mem;
361 	umem->num_pages = num_pages;
362 	umem->num_chunks = num_chunks;
363 
364 	sgt = &base_mem->sgt_append.sgt;
365 	__sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0);
366 
367 	for (i = 0; i < num_chunks; i++) {
368 		struct page **plist;
369 		unsigned int pix, nents = min(num_pages, PAGES_PER_CHUNK);
370 
371 		plist = kzalloc_objs(struct page *, nents);
372 		if (!plist) {
373 			rv = -ENOMEM;
374 			goto err_out;
375 		}
376 		umem->page_chunk[i].plist = plist;
377 
378 		for (pix = 0; pix < nents; pix++) {
379 			if (!__sg_page_iter_next(&sg_iter))
380 				break;
381 			plist[pix] = sg_page_iter_page(&sg_iter);
382 			num_pages--;
383 		}
384 	}
385 
386 	if (num_pages) {
387 		/*
388 		 * Unexpected size of sg list provided by ib_umem_get_va()
389 		 */
390 		siw_dbg(base_dev, "Short SG list, missing %u pages\n",
391 			num_pages);
392 		rv = -EINVAL;
393 		goto err_out;
394 	}
395 	return umem;
396 err_out:
397 	siw_umem_release(umem);
398 
399 	return ERR_PTR(rv);
400 }
401