xref: /linux/drivers/infiniband/sw/siw/siw_mem.c (revision 06d07429858317ded2db7986113a9e0129cd599b)
1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
4 /* Copyright (c) 2008-2019, IBM Corporation */
5 
6 #include <linux/gfp.h>
7 #include <rdma/ib_verbs.h>
8 #include <rdma/ib_umem.h>
9 #include <linux/dma-mapping.h>
10 #include <linux/slab.h>
11 #include <linux/sched/mm.h>
12 #include <linux/resource.h>
13 
14 #include "siw.h"
15 #include "siw_mem.h"
16 
17 /* Stag lookup is based on its index part only (24 bits). */
18 #define SIW_STAG_MAX_INDEX	0x00ffffff
19 
20 /*
21  * The code avoids special Stag of zero and tries to randomize
22  * STag values between 1 and SIW_STAG_MAX_INDEX.
23  */
siw_mem_add(struct siw_device * sdev,struct siw_mem * m)24 int siw_mem_add(struct siw_device *sdev, struct siw_mem *m)
25 {
26 	struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX);
27 	u32 id, next;
28 
29 	get_random_bytes(&next, 4);
30 	next &= SIW_STAG_MAX_INDEX;
31 
32 	if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next,
33 	    GFP_KERNEL) < 0)
34 		return -ENOMEM;
35 
36 	/* Set the STag index part */
37 	m->stag = id << 8;
38 
39 	siw_dbg_mem(m, "new MEM object\n");
40 
41 	return 0;
42 }
43 
44 /*
45  * siw_mem_id2obj()
46  *
47  * resolves memory from stag given by id. might be called from:
48  * o process context before sending out of sgl, or
49  * o in softirq when resolving target memory
50  */
siw_mem_id2obj(struct siw_device * sdev,int stag_index)51 struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
52 {
53 	struct siw_mem *mem;
54 
55 	rcu_read_lock();
56 	mem = xa_load(&sdev->mem_xa, stag_index);
57 	if (likely(mem && kref_get_unless_zero(&mem->ref))) {
58 		rcu_read_unlock();
59 		return mem;
60 	}
61 	rcu_read_unlock();
62 
63 	return NULL;
64 }
65 
siw_umem_release(struct siw_umem * umem)66 void siw_umem_release(struct siw_umem *umem)
67 {
68 	int i, num_pages = umem->num_pages;
69 
70 	if (umem->base_mem)
71 		ib_umem_release(umem->base_mem);
72 
73 	for (i = 0; num_pages > 0; i++) {
74 		kfree(umem->page_chunk[i].plist);
75 		num_pages -= PAGES_PER_CHUNK;
76 	}
77 	kfree(umem->page_chunk);
78 	kfree(umem);
79 }
80 
siw_mr_add_mem(struct siw_mr * mr,struct ib_pd * pd,void * mem_obj,u64 start,u64 len,int rights)81 int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
82 		   u64 start, u64 len, int rights)
83 {
84 	struct siw_device *sdev = to_siw_dev(pd->device);
85 	struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
86 	struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX);
87 	u32 id, next;
88 
89 	if (!mem)
90 		return -ENOMEM;
91 
92 	mem->mem_obj = mem_obj;
93 	mem->stag_valid = 0;
94 	mem->sdev = sdev;
95 	mem->va = start;
96 	mem->len = len;
97 	mem->pd = pd;
98 	mem->perms = rights & IWARP_ACCESS_MASK;
99 	kref_init(&mem->ref);
100 
101 	get_random_bytes(&next, 4);
102 	next &= SIW_STAG_MAX_INDEX;
103 
104 	if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next,
105 	    GFP_KERNEL) < 0) {
106 		kfree(mem);
107 		return -ENOMEM;
108 	}
109 
110 	mr->mem = mem;
111 	/* Set the STag index part */
112 	mem->stag = id << 8;
113 	mr->base_mr.lkey = mr->base_mr.rkey = mem->stag;
114 
115 	return 0;
116 }
117 
siw_mr_drop_mem(struct siw_mr * mr)118 void siw_mr_drop_mem(struct siw_mr *mr)
119 {
120 	struct siw_mem *mem = mr->mem, *found;
121 
122 	mem->stag_valid = 0;
123 
124 	/* make STag invalid visible asap */
125 	smp_mb();
126 
127 	found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8);
128 	WARN_ON(found != mem);
129 	siw_mem_put(mem);
130 }
131 
siw_free_mem(struct kref * ref)132 void siw_free_mem(struct kref *ref)
133 {
134 	struct siw_mem *mem = container_of(ref, struct siw_mem, ref);
135 
136 	siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n");
137 
138 	if (!mem->is_mw && mem->mem_obj) {
139 		if (mem->is_pbl == 0)
140 			siw_umem_release(mem->umem);
141 		else
142 			kfree(mem->pbl);
143 	}
144 	kfree(mem);
145 }
146 
147 /*
148  * siw_check_mem()
149  *
150  * Check protection domain, STAG state, access permissions and
151  * address range for memory object.
152  *
153  * @pd:		Protection Domain memory should belong to
154  * @mem:	memory to be checked
155  * @addr:	starting addr of mem
156  * @perms:	requested access permissions
157  * @len:	len of memory interval to be checked
158  *
159  */
siw_check_mem(struct ib_pd * pd,struct siw_mem * mem,u64 addr,enum ib_access_flags perms,int len)160 int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
161 		  enum ib_access_flags perms, int len)
162 {
163 	if (!mem->stag_valid) {
164 		siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag);
165 		return -E_STAG_INVALID;
166 	}
167 	if (mem->pd != pd) {
168 		siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag);
169 		return -E_PD_MISMATCH;
170 	}
171 	/*
172 	 * check access permissions
173 	 */
174 	if ((mem->perms & perms) < perms) {
175 		siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n",
176 			   mem->perms, perms);
177 		return -E_ACCESS_PERM;
178 	}
179 	/*
180 	 * Check if access falls into valid memory interval.
181 	 */
182 	if (addr < mem->va || addr + len > mem->va + mem->len) {
183 		siw_dbg_pd(pd, "MEM interval len %d\n", len);
184 		siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n",
185 			   (void *)(uintptr_t)addr,
186 			   (void *)(uintptr_t)(addr + len));
187 		siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n",
188 			   (void *)(uintptr_t)mem->va,
189 			   (void *)(uintptr_t)(mem->va + mem->len),
190 			   mem->stag);
191 
192 		return -E_BASE_BOUNDS;
193 	}
194 	return E_ACCESS_OK;
195 }
196 
197 /*
198  * siw_check_sge()
199  *
200  * Check SGE for access rights in given interval
201  *
202  * @pd:		Protection Domain memory should belong to
203  * @sge:	SGE to be checked
204  * @mem:	location of memory reference within array
205  * @perms:	requested access permissions
206  * @off:	starting offset in SGE
207  * @len:	len of memory interval to be checked
208  *
209  * NOTE: Function references SGE's memory object (mem->obj)
210  * if not yet done. New reference is kept if check went ok and
211  * released if check failed. If mem->obj is already valid, no new
212  * lookup is being done and mem is not released it check fails.
213  */
siw_check_sge(struct ib_pd * pd,struct siw_sge * sge,struct siw_mem * mem[],enum ib_access_flags perms,u32 off,int len)214 int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[],
215 		  enum ib_access_flags perms, u32 off, int len)
216 {
217 	struct siw_device *sdev = to_siw_dev(pd->device);
218 	struct siw_mem *new = NULL;
219 	int rv = E_ACCESS_OK;
220 
221 	if (len + off > sge->length) {
222 		rv = -E_BASE_BOUNDS;
223 		goto fail;
224 	}
225 	if (*mem == NULL) {
226 		new = siw_mem_id2obj(sdev, sge->lkey >> 8);
227 		if (unlikely(!new)) {
228 			siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey);
229 			rv = -E_STAG_INVALID;
230 			goto fail;
231 		}
232 		*mem = new;
233 	}
234 	/* Check if user re-registered with different STag key */
235 	if (unlikely((*mem)->stag != sge->lkey)) {
236 		siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey);
237 		rv = -E_STAG_INVALID;
238 		goto fail;
239 	}
240 	rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len);
241 	if (unlikely(rv))
242 		goto fail;
243 
244 	return 0;
245 
246 fail:
247 	if (new) {
248 		*mem = NULL;
249 		siw_mem_put(new);
250 	}
251 	return rv;
252 }
253 
siw_wqe_put_mem(struct siw_wqe * wqe,enum siw_opcode op)254 void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op)
255 {
256 	switch (op) {
257 	case SIW_OP_SEND:
258 	case SIW_OP_WRITE:
259 	case SIW_OP_SEND_WITH_IMM:
260 	case SIW_OP_SEND_REMOTE_INV:
261 	case SIW_OP_READ:
262 	case SIW_OP_READ_LOCAL_INV:
263 		if (!(wqe->sqe.flags & SIW_WQE_INLINE))
264 			siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge);
265 		break;
266 
267 	case SIW_OP_RECEIVE:
268 		siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge);
269 		break;
270 
271 	case SIW_OP_READ_RESPONSE:
272 		siw_unref_mem_sgl(wqe->mem, 1);
273 		break;
274 
275 	default:
276 		/*
277 		 * SIW_OP_INVAL_STAG and SIW_OP_REG_MR
278 		 * do not hold memory references
279 		 */
280 		break;
281 	}
282 }
283 
siw_invalidate_stag(struct ib_pd * pd,u32 stag)284 int siw_invalidate_stag(struct ib_pd *pd, u32 stag)
285 {
286 	struct siw_device *sdev = to_siw_dev(pd->device);
287 	struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8);
288 	int rv = 0;
289 
290 	if (unlikely(!mem)) {
291 		siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag);
292 		return -EINVAL;
293 	}
294 	if (unlikely(mem->pd != pd)) {
295 		siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag);
296 		rv = -EACCES;
297 		goto out;
298 	}
299 	/*
300 	 * Per RDMA verbs definition, an STag may already be in invalid
301 	 * state if invalidation is requested. So no state check here.
302 	 */
303 	mem->stag_valid = 0;
304 
305 	siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag);
306 out:
307 	siw_mem_put(mem);
308 	return rv;
309 }
310 
311 /*
312  * Gets physical address backed by PBL element. Address is referenced
313  * by linear byte offset into list of variably sized PB elements.
314  * Optionally, provides remaining len within current element, and
315  * current PBL index for later resume at same element.
316  */
siw_pbl_get_buffer(struct siw_pbl * pbl,u64 off,int * len,int * idx)317 dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx)
318 {
319 	int i = idx ? *idx : 0;
320 
321 	while (i < pbl->num_buf) {
322 		struct siw_pble *pble = &pbl->pbe[i];
323 
324 		if (pble->pbl_off + pble->size > off) {
325 			u64 pble_off = off - pble->pbl_off;
326 
327 			if (len)
328 				*len = pble->size - pble_off;
329 			if (idx)
330 				*idx = i;
331 
332 			return pble->addr + pble_off;
333 		}
334 		i++;
335 	}
336 	if (len)
337 		*len = 0;
338 	return 0;
339 }
340 
siw_pbl_alloc(u32 num_buf)341 struct siw_pbl *siw_pbl_alloc(u32 num_buf)
342 {
343 	struct siw_pbl *pbl;
344 
345 	if (num_buf == 0)
346 		return ERR_PTR(-EINVAL);
347 
348 	pbl = kzalloc(struct_size(pbl, pbe, num_buf), GFP_KERNEL);
349 	if (!pbl)
350 		return ERR_PTR(-ENOMEM);
351 
352 	pbl->max_buf = num_buf;
353 
354 	return pbl;
355 }
356 
siw_umem_get(struct ib_device * base_dev,u64 start,u64 len,int rights)357 struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start,
358 			      u64 len, int rights)
359 {
360 	struct siw_umem *umem;
361 	struct ib_umem *base_mem;
362 	struct sg_page_iter sg_iter;
363 	struct sg_table *sgt;
364 	u64 first_page_va;
365 	int num_pages, num_chunks, i, rv = 0;
366 
367 	if (!len)
368 		return ERR_PTR(-EINVAL);
369 
370 	first_page_va = start & PAGE_MASK;
371 	num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
372 	num_chunks = (num_pages >> CHUNK_SHIFT) + 1;
373 
374 	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
375 	if (!umem)
376 		return ERR_PTR(-ENOMEM);
377 
378 	umem->page_chunk =
379 		kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
380 	if (!umem->page_chunk) {
381 		rv = -ENOMEM;
382 		goto err_out;
383 	}
384 	base_mem = ib_umem_get(base_dev, start, len, rights);
385 	if (IS_ERR(base_mem)) {
386 		rv = PTR_ERR(base_mem);
387 		siw_dbg(base_dev, "Cannot pin user memory: %d\n", rv);
388 		goto err_out;
389 	}
390 	umem->fp_addr = first_page_va;
391 	umem->base_mem = base_mem;
392 
393 	sgt = &base_mem->sgt_append.sgt;
394 	__sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0);
395 
396 	if (!__sg_page_iter_next(&sg_iter)) {
397 		rv = -EINVAL;
398 		goto err_out;
399 	}
400 	for (i = 0; num_pages > 0; i++) {
401 		int nents = min_t(int, num_pages, PAGES_PER_CHUNK);
402 		struct page **plist =
403 			kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
404 
405 		if (!plist) {
406 			rv = -ENOMEM;
407 			goto err_out;
408 		}
409 		umem->page_chunk[i].plist = plist;
410 		while (nents--) {
411 			*plist = sg_page_iter_page(&sg_iter);
412 			umem->num_pages++;
413 			num_pages--;
414 			plist++;
415 			if (!__sg_page_iter_next(&sg_iter))
416 				break;
417 		}
418 	}
419 	return umem;
420 err_out:
421 	siw_umem_release(umem);
422 
423 	return ERR_PTR(rv);
424 }
425