1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 3 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ 4 /* Copyright (c) 2008-2019, IBM Corporation */ 5 6 #include <linux/gfp.h> 7 #include <rdma/ib_verbs.h> 8 #include <rdma/ib_umem.h> 9 #include <linux/dma-mapping.h> 10 #include <linux/slab.h> 11 #include <linux/sched/mm.h> 12 #include <linux/resource.h> 13 14 #include "siw.h" 15 #include "siw_mem.h" 16 17 /* Stag lookup is based on its index part only (24 bits). */ 18 #define SIW_STAG_MAX_INDEX 0x00ffffff 19 20 /* 21 * The code avoids special Stag of zero and tries to randomize 22 * STag values between 1 and SIW_STAG_MAX_INDEX. 23 */ 24 int siw_mem_add(struct siw_device *sdev, struct siw_mem *m) 25 { 26 struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX); 27 u32 id, next; 28 29 get_random_bytes(&next, 4); 30 next &= SIW_STAG_MAX_INDEX; 31 32 if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next, 33 GFP_KERNEL) < 0) 34 return -ENOMEM; 35 36 /* Set the STag index part */ 37 m->stag = id << 8; 38 39 siw_dbg_mem(m, "new MEM object\n"); 40 41 return 0; 42 } 43 44 /* 45 * siw_mem_id2obj() 46 * 47 * resolves memory from stag given by id. might be called from: 48 * o process context before sending out of sgl, or 49 * o in softirq when resolving target memory 50 */ 51 struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index) 52 { 53 struct siw_mem *mem; 54 55 rcu_read_lock(); 56 mem = xa_load(&sdev->mem_xa, stag_index); 57 if (likely(mem && kref_get_unless_zero(&mem->ref))) { 58 rcu_read_unlock(); 59 return mem; 60 } 61 rcu_read_unlock(); 62 63 return NULL; 64 } 65 66 void siw_umem_release(struct siw_umem *umem) 67 { 68 int i, num_pages = umem->num_pages; 69 70 if (umem->base_mem) 71 ib_umem_release(umem->base_mem); 72 73 for (i = 0; num_pages > 0; i++) { 74 kfree(umem->page_chunk[i].plist); 75 num_pages -= PAGES_PER_CHUNK; 76 } 77 kfree(umem->page_chunk); 78 kfree(umem); 79 } 80 81 int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj, 82 u64 start, u64 len, int rights) 83 { 84 struct siw_device *sdev = to_siw_dev(pd->device); 85 struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL); 86 struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX); 87 u32 id, next; 88 89 if (!mem) 90 return -ENOMEM; 91 92 mem->mem_obj = mem_obj; 93 mem->stag_valid = 0; 94 mem->sdev = sdev; 95 mem->va = start; 96 mem->len = len; 97 mem->pd = pd; 98 mem->perms = rights & IWARP_ACCESS_MASK; 99 kref_init(&mem->ref); 100 101 get_random_bytes(&next, 4); 102 next &= SIW_STAG_MAX_INDEX; 103 104 if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next, 105 GFP_KERNEL) < 0) { 106 kfree(mem); 107 return -ENOMEM; 108 } 109 110 mr->mem = mem; 111 /* Set the STag index part */ 112 mem->stag = id << 8; 113 mr->base_mr.lkey = mr->base_mr.rkey = mem->stag; 114 115 return 0; 116 } 117 118 void siw_mr_drop_mem(struct siw_mr *mr) 119 { 120 struct siw_mem *mem = mr->mem, *found; 121 122 mem->stag_valid = 0; 123 124 /* make STag invalid visible asap */ 125 smp_mb(); 126 127 found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8); 128 WARN_ON(found != mem); 129 siw_mem_put(mem); 130 } 131 132 void siw_free_mem(struct kref *ref) 133 { 134 struct siw_mem *mem = container_of(ref, struct siw_mem, ref); 135 136 siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n"); 137 138 if (!mem->is_mw && mem->mem_obj) { 139 if (mem->is_pbl == 0) 140 siw_umem_release(mem->umem); 141 else 142 kfree(mem->pbl); 143 } 144 kfree(mem); 145 } 146 147 /* 148 * siw_check_mem() 149 * 150 * Check protection domain, STAG state, access permissions and 151 * address range for memory object. 152 * 153 * @pd: Protection Domain memory should belong to 154 * @mem: memory to be checked 155 * @addr: starting addr of mem 156 * @perms: requested access permissions 157 * @len: len of memory interval to be checked 158 * 159 */ 160 int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr, 161 enum ib_access_flags perms, int len) 162 { 163 if (!mem->stag_valid) { 164 siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag); 165 return -E_STAG_INVALID; 166 } 167 if (mem->pd != pd) { 168 siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag); 169 return -E_PD_MISMATCH; 170 } 171 /* 172 * check access permissions 173 */ 174 if ((mem->perms & perms) < perms) { 175 siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n", 176 mem->perms, perms); 177 return -E_ACCESS_PERM; 178 } 179 /* 180 * Check if access falls into valid memory interval. 181 */ 182 if (addr < mem->va || addr + len > mem->va + mem->len) { 183 siw_dbg_pd(pd, "MEM interval len %d\n", len); 184 siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n", 185 (void *)(uintptr_t)addr, 186 (void *)(uintptr_t)(addr + len)); 187 siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n", 188 (void *)(uintptr_t)mem->va, 189 (void *)(uintptr_t)(mem->va + mem->len), 190 mem->stag); 191 192 return -E_BASE_BOUNDS; 193 } 194 return E_ACCESS_OK; 195 } 196 197 /* 198 * siw_check_sge() 199 * 200 * Check SGE for access rights in given interval 201 * 202 * @pd: Protection Domain memory should belong to 203 * @sge: SGE to be checked 204 * @mem: location of memory reference within array 205 * @perms: requested access permissions 206 * @off: starting offset in SGE 207 * @len: len of memory interval to be checked 208 * 209 * NOTE: Function references SGE's memory object (mem->obj) 210 * if not yet done. New reference is kept if check went ok and 211 * released if check failed. If mem->obj is already valid, no new 212 * lookup is being done and mem is not released it check fails. 213 */ 214 int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[], 215 enum ib_access_flags perms, u32 off, int len) 216 { 217 struct siw_device *sdev = to_siw_dev(pd->device); 218 struct siw_mem *new = NULL; 219 int rv = E_ACCESS_OK; 220 221 if (len + off > sge->length) { 222 rv = -E_BASE_BOUNDS; 223 goto fail; 224 } 225 if (*mem == NULL) { 226 new = siw_mem_id2obj(sdev, sge->lkey >> 8); 227 if (unlikely(!new)) { 228 siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey); 229 rv = -E_STAG_INVALID; 230 goto fail; 231 } 232 *mem = new; 233 } 234 /* Check if user re-registered with different STag key */ 235 if (unlikely((*mem)->stag != sge->lkey)) { 236 siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey); 237 rv = -E_STAG_INVALID; 238 goto fail; 239 } 240 rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len); 241 if (unlikely(rv)) 242 goto fail; 243 244 return 0; 245 246 fail: 247 if (new) { 248 *mem = NULL; 249 siw_mem_put(new); 250 } 251 return rv; 252 } 253 254 void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op) 255 { 256 switch (op) { 257 case SIW_OP_SEND: 258 case SIW_OP_WRITE: 259 case SIW_OP_SEND_WITH_IMM: 260 case SIW_OP_SEND_REMOTE_INV: 261 case SIW_OP_READ: 262 case SIW_OP_READ_LOCAL_INV: 263 if (!(wqe->sqe.flags & SIW_WQE_INLINE)) 264 siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge); 265 break; 266 267 case SIW_OP_RECEIVE: 268 siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge); 269 break; 270 271 case SIW_OP_READ_RESPONSE: 272 siw_unref_mem_sgl(wqe->mem, 1); 273 break; 274 275 default: 276 /* 277 * SIW_OP_INVAL_STAG and SIW_OP_REG_MR 278 * do not hold memory references 279 */ 280 break; 281 } 282 } 283 284 int siw_invalidate_stag(struct ib_pd *pd, u32 stag) 285 { 286 struct siw_device *sdev = to_siw_dev(pd->device); 287 struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8); 288 int rv = 0; 289 290 if (unlikely(!mem)) { 291 siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag); 292 return -EINVAL; 293 } 294 if (unlikely(mem->pd != pd)) { 295 siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag); 296 rv = -EACCES; 297 goto out; 298 } 299 /* 300 * Per RDMA verbs definition, an STag may already be in invalid 301 * state if invalidation is requested. So no state check here. 302 */ 303 mem->stag_valid = 0; 304 305 siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag); 306 out: 307 siw_mem_put(mem); 308 return rv; 309 } 310 311 /* 312 * Gets physical address backed by PBL element. Address is referenced 313 * by linear byte offset into list of variably sized PB elements. 314 * Optionally, provides remaining len within current element, and 315 * current PBL index for later resume at same element. 316 */ 317 dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx) 318 { 319 int i = idx ? *idx : 0; 320 321 while (i < pbl->num_buf) { 322 struct siw_pble *pble = &pbl->pbe[i]; 323 324 if (pble->pbl_off + pble->size > off) { 325 u64 pble_off = off - pble->pbl_off; 326 327 if (len) 328 *len = pble->size - pble_off; 329 if (idx) 330 *idx = i; 331 332 return pble->addr + pble_off; 333 } 334 i++; 335 } 336 if (len) 337 *len = 0; 338 return 0; 339 } 340 341 struct siw_pbl *siw_pbl_alloc(u32 num_buf) 342 { 343 struct siw_pbl *pbl; 344 345 if (num_buf == 0) 346 return ERR_PTR(-EINVAL); 347 348 pbl = kzalloc(struct_size(pbl, pbe, num_buf), GFP_KERNEL); 349 if (!pbl) 350 return ERR_PTR(-ENOMEM); 351 352 pbl->max_buf = num_buf; 353 354 return pbl; 355 } 356 357 struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start, 358 u64 len, int rights) 359 { 360 struct siw_umem *umem; 361 struct ib_umem *base_mem; 362 struct sg_page_iter sg_iter; 363 struct sg_table *sgt; 364 u64 first_page_va; 365 int num_pages, num_chunks, i, rv = 0; 366 367 if (!len) 368 return ERR_PTR(-EINVAL); 369 370 first_page_va = start & PAGE_MASK; 371 num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT; 372 num_chunks = (num_pages >> CHUNK_SHIFT) + 1; 373 374 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 375 if (!umem) 376 return ERR_PTR(-ENOMEM); 377 378 umem->page_chunk = 379 kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL); 380 if (!umem->page_chunk) { 381 rv = -ENOMEM; 382 goto err_out; 383 } 384 base_mem = ib_umem_get(base_dev, start, len, rights); 385 if (IS_ERR(base_mem)) { 386 rv = PTR_ERR(base_mem); 387 siw_dbg(base_dev, "Cannot pin user memory: %d\n", rv); 388 goto err_out; 389 } 390 umem->fp_addr = first_page_va; 391 umem->base_mem = base_mem; 392 393 sgt = &base_mem->sgt_append.sgt; 394 __sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0); 395 396 if (!__sg_page_iter_next(&sg_iter)) { 397 rv = -EINVAL; 398 goto err_out; 399 } 400 for (i = 0; num_pages > 0; i++) { 401 int nents = min_t(int, num_pages, PAGES_PER_CHUNK); 402 struct page **plist = 403 kcalloc(nents, sizeof(struct page *), GFP_KERNEL); 404 405 if (!plist) { 406 rv = -ENOMEM; 407 goto err_out; 408 } 409 umem->page_chunk[i].plist = plist; 410 while (nents--) { 411 *plist = sg_page_iter_page(&sg_iter); 412 umem->num_pages++; 413 num_pages--; 414 plist++; 415 if (!__sg_page_iter_next(&sg_iter)) 416 break; 417 } 418 } 419 return umem; 420 err_out: 421 siw_umem_release(umem); 422 423 return ERR_PTR(rv); 424 } 425