1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 3 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ 4 /* Copyright (c) 2008-2019, IBM Corporation */ 5 6 #include <linux/gfp.h> 7 #include <rdma/ib_verbs.h> 8 #include <rdma/ib_umem.h> 9 #include <linux/dma-mapping.h> 10 #include <linux/slab.h> 11 #include <linux/sched/mm.h> 12 #include <linux/resource.h> 13 14 #include "siw.h" 15 #include "siw_mem.h" 16 17 /* Stag lookup is based on its index part only (24 bits). */ 18 #define SIW_STAG_MAX_INDEX 0x00ffffff 19 20 /* 21 * siw_mem_id2obj() 22 * 23 * resolves memory from stag given by id. might be called from: 24 * o process context before sending out of sgl, or 25 * o in softirq when resolving target memory 26 */ 27 struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index) 28 { 29 struct siw_mem *mem; 30 31 rcu_read_lock(); 32 mem = xa_load(&sdev->mem_xa, stag_index); 33 if (likely(mem && kref_get_unless_zero(&mem->ref))) { 34 rcu_read_unlock(); 35 return mem; 36 } 37 rcu_read_unlock(); 38 39 return NULL; 40 } 41 42 void siw_umem_release(struct siw_umem *umem) 43 { 44 unsigned int i, num_chunks = umem->num_chunks; 45 46 if (umem->base_mem) 47 ib_umem_release(umem->base_mem); 48 49 for (i = 0; i < num_chunks; i++) 50 kfree(umem->page_chunk[i].plist); 51 52 kfree(umem); 53 } 54 55 int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj, 56 u64 start, u64 len, int rights) 57 { 58 struct siw_device *sdev = to_siw_dev(pd->device); 59 struct siw_mem *mem = kzalloc_obj(*mem); 60 struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX); 61 u32 id, next; 62 63 if (!mem) 64 return -ENOMEM; 65 66 mem->mem_obj = mem_obj; 67 mem->stag_valid = 0; 68 mem->sdev = sdev; 69 mem->va = start; 70 mem->len = len; 71 mem->pd = pd; 72 mem->perms = rights & IWARP_ACCESS_MASK; 73 kref_init(&mem->ref); 74 75 get_random_bytes(&next, 4); 76 next &= SIW_STAG_MAX_INDEX; 77 78 if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next, 79 GFP_KERNEL) < 0) { 80 kfree(mem); 81 return -ENOMEM; 82 } 83 84 mr->mem = mem; 85 /* Set the STag index part */ 86 mem->stag = id << 8; 87 mr->base_mr.lkey = mr->base_mr.rkey = mem->stag; 88 89 return 0; 90 } 91 92 void siw_mr_drop_mem(struct siw_mr *mr) 93 { 94 struct siw_mem *mem = mr->mem, *found; 95 96 mem->stag_valid = 0; 97 98 /* make STag invalid visible asap */ 99 smp_mb(); 100 101 found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8); 102 WARN_ON(found != mem); 103 siw_mem_put(mem); 104 } 105 106 void siw_free_mem(struct kref *ref) 107 { 108 struct siw_mem *mem = container_of(ref, struct siw_mem, ref); 109 110 siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n"); 111 112 if (!mem->is_mw && mem->mem_obj) { 113 if (mem->is_pbl == 0) 114 siw_umem_release(mem->umem); 115 else 116 kfree(mem->pbl); 117 } 118 kfree(mem); 119 } 120 121 /* 122 * siw_check_mem() 123 * 124 * Check protection domain, STAG state, access permissions and 125 * address range for memory object. 126 * 127 * @pd: Protection Domain memory should belong to 128 * @mem: memory to be checked 129 * @addr: starting addr of mem 130 * @perms: requested access permissions 131 * @len: len of memory interval to be checked 132 * 133 */ 134 int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr, 135 enum ib_access_flags perms, int len) 136 { 137 if (!mem->stag_valid) { 138 siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag); 139 return -E_STAG_INVALID; 140 } 141 if (mem->pd != pd) { 142 siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag); 143 return -E_PD_MISMATCH; 144 } 145 /* 146 * check access permissions 147 */ 148 if ((mem->perms & perms) < perms) { 149 siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n", 150 mem->perms, perms); 151 return -E_ACCESS_PERM; 152 } 153 /* 154 * Check if access falls into valid memory interval. 155 */ 156 if (addr < mem->va || addr + len > mem->va + mem->len) { 157 siw_dbg_pd(pd, "MEM interval len %d\n", len); 158 siw_dbg_pd(pd, "[0x%p, 0x%p] out of bounds\n", 159 (void *)(uintptr_t)addr, 160 (void *)(uintptr_t)(addr + len)); 161 siw_dbg_pd(pd, "[0x%p, 0x%p] STag=0x%08x\n", 162 (void *)(uintptr_t)mem->va, 163 (void *)(uintptr_t)(mem->va + mem->len), 164 mem->stag); 165 166 return -E_BASE_BOUNDS; 167 } 168 return E_ACCESS_OK; 169 } 170 171 /* 172 * siw_check_sge() 173 * 174 * Check SGE for access rights in given interval 175 * 176 * @pd: Protection Domain memory should belong to 177 * @sge: SGE to be checked 178 * @mem: location of memory reference within array 179 * @perms: requested access permissions 180 * @off: starting offset in SGE 181 * @len: len of memory interval to be checked 182 * 183 * NOTE: Function references SGE's memory object (mem->obj) 184 * if not yet done. New reference is kept if check went ok and 185 * released if check failed. If mem->obj is already valid, no new 186 * lookup is being done and mem is not released it check fails. 187 */ 188 int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[], 189 enum ib_access_flags perms, u32 off, u32 len) 190 { 191 struct siw_device *sdev = to_siw_dev(pd->device); 192 struct siw_mem *new = NULL; 193 int rv = E_ACCESS_OK; 194 195 if (len + off > sge->length) { 196 rv = -E_BASE_BOUNDS; 197 goto fail; 198 } 199 if (*mem == NULL) { 200 new = siw_mem_id2obj(sdev, sge->lkey >> 8); 201 if (unlikely(!new)) { 202 siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey); 203 rv = -E_STAG_INVALID; 204 goto fail; 205 } 206 *mem = new; 207 } 208 /* Check if user re-registered with different STag key */ 209 if (unlikely((*mem)->stag != sge->lkey)) { 210 siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey); 211 rv = -E_STAG_INVALID; 212 goto fail; 213 } 214 rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len); 215 if (unlikely(rv)) 216 goto fail; 217 218 return 0; 219 220 fail: 221 if (new) { 222 *mem = NULL; 223 siw_mem_put(new); 224 } 225 return rv; 226 } 227 228 void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op) 229 { 230 switch (op) { 231 case SIW_OP_SEND: 232 case SIW_OP_WRITE: 233 case SIW_OP_SEND_WITH_IMM: 234 case SIW_OP_SEND_REMOTE_INV: 235 case SIW_OP_READ: 236 case SIW_OP_READ_LOCAL_INV: 237 if (!(wqe->sqe.flags & SIW_WQE_INLINE)) 238 siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge); 239 break; 240 241 case SIW_OP_RECEIVE: 242 siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge); 243 break; 244 245 case SIW_OP_READ_RESPONSE: 246 siw_unref_mem_sgl(wqe->mem, 1); 247 break; 248 249 default: 250 /* 251 * SIW_OP_INVAL_STAG and SIW_OP_REG_MR 252 * do not hold memory references 253 */ 254 break; 255 } 256 } 257 258 int siw_invalidate_stag(struct ib_pd *pd, u32 stag) 259 { 260 struct siw_device *sdev = to_siw_dev(pd->device); 261 struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8); 262 int rv = 0; 263 264 if (unlikely(!mem)) { 265 siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag); 266 return -EINVAL; 267 } 268 if (unlikely(mem->pd != pd)) { 269 siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag); 270 rv = -EACCES; 271 goto out; 272 } 273 /* 274 * Per RDMA verbs definition, an STag may already be in invalid 275 * state if invalidation is requested. So no state check here. 276 */ 277 mem->stag_valid = 0; 278 279 siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag); 280 out: 281 siw_mem_put(mem); 282 return rv; 283 } 284 285 /* 286 * Gets physical address backed by PBL element. Address is referenced 287 * by linear byte offset into list of variably sized PB elements. 288 * Optionally, provides remaining len within current element, and 289 * current PBL index for later resume at same element. 290 */ 291 dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx) 292 { 293 int i = idx ? *idx : 0; 294 295 while (i < pbl->num_buf) { 296 struct siw_pble *pble = &pbl->pbe[i]; 297 298 if (pble->pbl_off + pble->size > off) { 299 u64 pble_off = off - pble->pbl_off; 300 301 if (len) 302 *len = pble->size - pble_off; 303 if (idx) 304 *idx = i; 305 306 return pble->addr + pble_off; 307 } 308 i++; 309 } 310 if (len) 311 *len = 0; 312 return 0; 313 } 314 315 struct siw_pbl *siw_pbl_alloc(u32 num_buf) 316 { 317 struct siw_pbl *pbl; 318 319 if (num_buf == 0) 320 return ERR_PTR(-EINVAL); 321 322 pbl = kzalloc_flex(*pbl, pbe, num_buf); 323 if (!pbl) 324 return ERR_PTR(-ENOMEM); 325 326 pbl->max_buf = num_buf; 327 328 return pbl; 329 } 330 331 struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start, 332 u64 len, int rights) 333 { 334 struct siw_umem *umem; 335 struct ib_umem *base_mem; 336 struct sg_page_iter sg_iter; 337 struct sg_table *sgt; 338 u64 first_page_va; 339 unsigned int num_pages, num_chunks, i; 340 int rv = 0; 341 342 if (!len) 343 return ERR_PTR(-EINVAL); 344 345 first_page_va = start & PAGE_MASK; 346 num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT; 347 num_chunks = ((num_pages - 1) >> CHUNK_SHIFT) + 1; 348 349 umem = kzalloc_flex(*umem, page_chunk, num_chunks); 350 if (!umem) 351 return ERR_PTR(-ENOMEM); 352 353 base_mem = ib_umem_get_va(base_dev, start, len, rights); 354 if (IS_ERR(base_mem)) { 355 rv = PTR_ERR(base_mem); 356 siw_dbg(base_dev, "Cannot pin user memory: %d\n", rv); 357 goto err_out; 358 } 359 umem->fp_addr = first_page_va; 360 umem->base_mem = base_mem; 361 umem->num_pages = num_pages; 362 umem->num_chunks = num_chunks; 363 364 sgt = &base_mem->sgt_append.sgt; 365 __sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0); 366 367 for (i = 0; i < num_chunks; i++) { 368 struct page **plist; 369 unsigned int pix, nents = min(num_pages, PAGES_PER_CHUNK); 370 371 plist = kzalloc_objs(struct page *, nents); 372 if (!plist) { 373 rv = -ENOMEM; 374 goto err_out; 375 } 376 umem->page_chunk[i].plist = plist; 377 378 for (pix = 0; pix < nents; pix++) { 379 if (!__sg_page_iter_next(&sg_iter)) 380 break; 381 plist[pix] = sg_page_iter_page(&sg_iter); 382 num_pages--; 383 } 384 } 385 386 if (num_pages) { 387 /* 388 * Unexpected size of sg list provided by ib_umem_get_va() 389 */ 390 siw_dbg(base_dev, "Short SG list, missing %u pages\n", 391 num_pages); 392 rv = -EINVAL; 393 goto err_out; 394 } 395 return umem; 396 err_out: 397 siw_umem_release(umem); 398 399 return ERR_PTR(rv); 400 } 401