1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
4 /* Copyright (c) 2008-2019, IBM Corporation */
5
6 #include <linux/gfp.h>
7 #include <rdma/ib_verbs.h>
8 #include <rdma/ib_umem.h>
9 #include <linux/dma-mapping.h>
10 #include <linux/slab.h>
11 #include <linux/sched/mm.h>
12 #include <linux/resource.h>
13
14 #include "siw.h"
15 #include "siw_mem.h"
16
17 /* Stag lookup is based on its index part only (24 bits). */
18 #define SIW_STAG_MAX_INDEX 0x00ffffff
19
20 /*
21 * siw_mem_id2obj()
22 *
23 * resolves memory from stag given by id. might be called from:
24 * o process context before sending out of sgl, or
25 * o in softirq when resolving target memory
26 */
siw_mem_id2obj(struct siw_device * sdev,int stag_index)27 struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
28 {
29 struct siw_mem *mem;
30
31 rcu_read_lock();
32 mem = xa_load(&sdev->mem_xa, stag_index);
33 if (likely(mem && kref_get_unless_zero(&mem->ref))) {
34 rcu_read_unlock();
35 return mem;
36 }
37 rcu_read_unlock();
38
39 return NULL;
40 }
41
siw_umem_release(struct siw_umem * umem)42 void siw_umem_release(struct siw_umem *umem)
43 {
44 int i, num_pages = umem->num_pages;
45
46 if (umem->base_mem)
47 ib_umem_release(umem->base_mem);
48
49 for (i = 0; num_pages > 0; i++) {
50 kfree(umem->page_chunk[i].plist);
51 num_pages -= PAGES_PER_CHUNK;
52 }
53 kfree(umem->page_chunk);
54 kfree(umem);
55 }
56
siw_mr_add_mem(struct siw_mr * mr,struct ib_pd * pd,void * mem_obj,u64 start,u64 len,int rights)57 int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
58 u64 start, u64 len, int rights)
59 {
60 struct siw_device *sdev = to_siw_dev(pd->device);
61 struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
62 struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX);
63 u32 id, next;
64
65 if (!mem)
66 return -ENOMEM;
67
68 mem->mem_obj = mem_obj;
69 mem->stag_valid = 0;
70 mem->sdev = sdev;
71 mem->va = start;
72 mem->len = len;
73 mem->pd = pd;
74 mem->perms = rights & IWARP_ACCESS_MASK;
75 kref_init(&mem->ref);
76
77 get_random_bytes(&next, 4);
78 next &= SIW_STAG_MAX_INDEX;
79
80 if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next,
81 GFP_KERNEL) < 0) {
82 kfree(mem);
83 return -ENOMEM;
84 }
85
86 mr->mem = mem;
87 /* Set the STag index part */
88 mem->stag = id << 8;
89 mr->base_mr.lkey = mr->base_mr.rkey = mem->stag;
90
91 return 0;
92 }
93
siw_mr_drop_mem(struct siw_mr * mr)94 void siw_mr_drop_mem(struct siw_mr *mr)
95 {
96 struct siw_mem *mem = mr->mem, *found;
97
98 mem->stag_valid = 0;
99
100 /* make STag invalid visible asap */
101 smp_mb();
102
103 found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8);
104 WARN_ON(found != mem);
105 siw_mem_put(mem);
106 }
107
siw_free_mem(struct kref * ref)108 void siw_free_mem(struct kref *ref)
109 {
110 struct siw_mem *mem = container_of(ref, struct siw_mem, ref);
111
112 siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n");
113
114 if (!mem->is_mw && mem->mem_obj) {
115 if (mem->is_pbl == 0)
116 siw_umem_release(mem->umem);
117 else
118 kfree(mem->pbl);
119 }
120 kfree(mem);
121 }
122
123 /*
124 * siw_check_mem()
125 *
126 * Check protection domain, STAG state, access permissions and
127 * address range for memory object.
128 *
129 * @pd: Protection Domain memory should belong to
130 * @mem: memory to be checked
131 * @addr: starting addr of mem
132 * @perms: requested access permissions
133 * @len: len of memory interval to be checked
134 *
135 */
siw_check_mem(struct ib_pd * pd,struct siw_mem * mem,u64 addr,enum ib_access_flags perms,int len)136 int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
137 enum ib_access_flags perms, int len)
138 {
139 if (!mem->stag_valid) {
140 siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag);
141 return -E_STAG_INVALID;
142 }
143 if (mem->pd != pd) {
144 siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag);
145 return -E_PD_MISMATCH;
146 }
147 /*
148 * check access permissions
149 */
150 if ((mem->perms & perms) < perms) {
151 siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n",
152 mem->perms, perms);
153 return -E_ACCESS_PERM;
154 }
155 /*
156 * Check if access falls into valid memory interval.
157 */
158 if (addr < mem->va || addr + len > mem->va + mem->len) {
159 siw_dbg_pd(pd, "MEM interval len %d\n", len);
160 siw_dbg_pd(pd, "[0x%p, 0x%p] out of bounds\n",
161 (void *)(uintptr_t)addr,
162 (void *)(uintptr_t)(addr + len));
163 siw_dbg_pd(pd, "[0x%p, 0x%p] STag=0x%08x\n",
164 (void *)(uintptr_t)mem->va,
165 (void *)(uintptr_t)(mem->va + mem->len),
166 mem->stag);
167
168 return -E_BASE_BOUNDS;
169 }
170 return E_ACCESS_OK;
171 }
172
173 /*
174 * siw_check_sge()
175 *
176 * Check SGE for access rights in given interval
177 *
178 * @pd: Protection Domain memory should belong to
179 * @sge: SGE to be checked
180 * @mem: location of memory reference within array
181 * @perms: requested access permissions
182 * @off: starting offset in SGE
183 * @len: len of memory interval to be checked
184 *
185 * NOTE: Function references SGE's memory object (mem->obj)
186 * if not yet done. New reference is kept if check went ok and
187 * released if check failed. If mem->obj is already valid, no new
188 * lookup is being done and mem is not released it check fails.
189 */
siw_check_sge(struct ib_pd * pd,struct siw_sge * sge,struct siw_mem * mem[],enum ib_access_flags perms,u32 off,int len)190 int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[],
191 enum ib_access_flags perms, u32 off, int len)
192 {
193 struct siw_device *sdev = to_siw_dev(pd->device);
194 struct siw_mem *new = NULL;
195 int rv = E_ACCESS_OK;
196
197 if (len + off > sge->length) {
198 rv = -E_BASE_BOUNDS;
199 goto fail;
200 }
201 if (*mem == NULL) {
202 new = siw_mem_id2obj(sdev, sge->lkey >> 8);
203 if (unlikely(!new)) {
204 siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey);
205 rv = -E_STAG_INVALID;
206 goto fail;
207 }
208 *mem = new;
209 }
210 /* Check if user re-registered with different STag key */
211 if (unlikely((*mem)->stag != sge->lkey)) {
212 siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey);
213 rv = -E_STAG_INVALID;
214 goto fail;
215 }
216 rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len);
217 if (unlikely(rv))
218 goto fail;
219
220 return 0;
221
222 fail:
223 if (new) {
224 *mem = NULL;
225 siw_mem_put(new);
226 }
227 return rv;
228 }
229
siw_wqe_put_mem(struct siw_wqe * wqe,enum siw_opcode op)230 void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op)
231 {
232 switch (op) {
233 case SIW_OP_SEND:
234 case SIW_OP_WRITE:
235 case SIW_OP_SEND_WITH_IMM:
236 case SIW_OP_SEND_REMOTE_INV:
237 case SIW_OP_READ:
238 case SIW_OP_READ_LOCAL_INV:
239 if (!(wqe->sqe.flags & SIW_WQE_INLINE))
240 siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge);
241 break;
242
243 case SIW_OP_RECEIVE:
244 siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge);
245 break;
246
247 case SIW_OP_READ_RESPONSE:
248 siw_unref_mem_sgl(wqe->mem, 1);
249 break;
250
251 default:
252 /*
253 * SIW_OP_INVAL_STAG and SIW_OP_REG_MR
254 * do not hold memory references
255 */
256 break;
257 }
258 }
259
siw_invalidate_stag(struct ib_pd * pd,u32 stag)260 int siw_invalidate_stag(struct ib_pd *pd, u32 stag)
261 {
262 struct siw_device *sdev = to_siw_dev(pd->device);
263 struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8);
264 int rv = 0;
265
266 if (unlikely(!mem)) {
267 siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag);
268 return -EINVAL;
269 }
270 if (unlikely(mem->pd != pd)) {
271 siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag);
272 rv = -EACCES;
273 goto out;
274 }
275 /*
276 * Per RDMA verbs definition, an STag may already be in invalid
277 * state if invalidation is requested. So no state check here.
278 */
279 mem->stag_valid = 0;
280
281 siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag);
282 out:
283 siw_mem_put(mem);
284 return rv;
285 }
286
287 /*
288 * Gets physical address backed by PBL element. Address is referenced
289 * by linear byte offset into list of variably sized PB elements.
290 * Optionally, provides remaining len within current element, and
291 * current PBL index for later resume at same element.
292 */
siw_pbl_get_buffer(struct siw_pbl * pbl,u64 off,int * len,int * idx)293 dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx)
294 {
295 int i = idx ? *idx : 0;
296
297 while (i < pbl->num_buf) {
298 struct siw_pble *pble = &pbl->pbe[i];
299
300 if (pble->pbl_off + pble->size > off) {
301 u64 pble_off = off - pble->pbl_off;
302
303 if (len)
304 *len = pble->size - pble_off;
305 if (idx)
306 *idx = i;
307
308 return pble->addr + pble_off;
309 }
310 i++;
311 }
312 if (len)
313 *len = 0;
314 return 0;
315 }
316
siw_pbl_alloc(u32 num_buf)317 struct siw_pbl *siw_pbl_alloc(u32 num_buf)
318 {
319 struct siw_pbl *pbl;
320
321 if (num_buf == 0)
322 return ERR_PTR(-EINVAL);
323
324 pbl = kzalloc(struct_size(pbl, pbe, num_buf), GFP_KERNEL);
325 if (!pbl)
326 return ERR_PTR(-ENOMEM);
327
328 pbl->max_buf = num_buf;
329
330 return pbl;
331 }
332
siw_umem_get(struct ib_device * base_dev,u64 start,u64 len,int rights)333 struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start,
334 u64 len, int rights)
335 {
336 struct siw_umem *umem;
337 struct ib_umem *base_mem;
338 struct sg_page_iter sg_iter;
339 struct sg_table *sgt;
340 u64 first_page_va;
341 int num_pages, num_chunks, i, rv = 0;
342
343 if (!len)
344 return ERR_PTR(-EINVAL);
345
346 first_page_va = start & PAGE_MASK;
347 num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
348 num_chunks = (num_pages >> CHUNK_SHIFT) + 1;
349
350 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
351 if (!umem)
352 return ERR_PTR(-ENOMEM);
353
354 umem->page_chunk =
355 kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
356 if (!umem->page_chunk) {
357 rv = -ENOMEM;
358 goto err_out;
359 }
360 base_mem = ib_umem_get(base_dev, start, len, rights);
361 if (IS_ERR(base_mem)) {
362 rv = PTR_ERR(base_mem);
363 siw_dbg(base_dev, "Cannot pin user memory: %d\n", rv);
364 goto err_out;
365 }
366 umem->fp_addr = first_page_va;
367 umem->base_mem = base_mem;
368
369 sgt = &base_mem->sgt_append.sgt;
370 __sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0);
371
372 if (!__sg_page_iter_next(&sg_iter)) {
373 rv = -EINVAL;
374 goto err_out;
375 }
376 for (i = 0; num_pages > 0; i++) {
377 int nents = min_t(int, num_pages, PAGES_PER_CHUNK);
378 struct page **plist =
379 kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
380
381 if (!plist) {
382 rv = -ENOMEM;
383 goto err_out;
384 }
385 umem->page_chunk[i].plist = plist;
386 while (nents--) {
387 *plist = sg_page_iter_page(&sg_iter);
388 umem->num_pages++;
389 num_pages--;
390 plist++;
391 if (!__sg_page_iter_next(&sg_iter))
392 break;
393 }
394 }
395 return umem;
396 err_out:
397 siw_umem_release(umem);
398
399 return ERR_PTR(rv);
400 }
401