1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 3 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ 4 /* Copyright (c) 2008-2019, IBM Corporation */ 5 6 #include <linux/gfp.h> 7 #include <rdma/ib_verbs.h> 8 #include <rdma/ib_umem.h> 9 #include <linux/dma-mapping.h> 10 #include <linux/slab.h> 11 #include <linux/sched/mm.h> 12 #include <linux/resource.h> 13 14 #include "siw.h" 15 #include "siw_mem.h" 16 17 /* Stag lookup is based on its index part only (24 bits). */ 18 #define SIW_STAG_MAX_INDEX 0x00ffffff 19 20 /* 21 * siw_mem_id2obj() 22 * 23 * resolves memory from stag given by id. might be called from: 24 * o process context before sending out of sgl, or 25 * o in softirq when resolving target memory 26 */ 27 struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index) 28 { 29 struct siw_mem *mem; 30 31 rcu_read_lock(); 32 mem = xa_load(&sdev->mem_xa, stag_index); 33 if (likely(mem && kref_get_unless_zero(&mem->ref))) { 34 rcu_read_unlock(); 35 return mem; 36 } 37 rcu_read_unlock(); 38 39 return NULL; 40 } 41 42 void siw_umem_release(struct siw_umem *umem) 43 { 44 int i, num_pages = umem->num_pages; 45 46 if (umem->base_mem) 47 ib_umem_release(umem->base_mem); 48 49 for (i = 0; num_pages > 0; i++) { 50 kfree(umem->page_chunk[i].plist); 51 num_pages -= PAGES_PER_CHUNK; 52 } 53 kfree(umem->page_chunk); 54 kfree(umem); 55 } 56 57 int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj, 58 u64 start, u64 len, int rights) 59 { 60 struct siw_device *sdev = to_siw_dev(pd->device); 61 struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL); 62 struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX); 63 u32 id, next; 64 65 if (!mem) 66 return -ENOMEM; 67 68 mem->mem_obj = mem_obj; 69 mem->stag_valid = 0; 70 mem->sdev = sdev; 71 mem->va = start; 72 mem->len = len; 73 mem->pd = pd; 74 mem->perms = rights & IWARP_ACCESS_MASK; 75 kref_init(&mem->ref); 76 77 get_random_bytes(&next, 4); 78 next &= SIW_STAG_MAX_INDEX; 79 80 if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next, 81 GFP_KERNEL) < 0) { 82 kfree(mem); 83 return -ENOMEM; 84 } 85 86 mr->mem = mem; 87 /* Set the STag index part */ 88 mem->stag = id << 8; 89 mr->base_mr.lkey = mr->base_mr.rkey = mem->stag; 90 91 return 0; 92 } 93 94 void siw_mr_drop_mem(struct siw_mr *mr) 95 { 96 struct siw_mem *mem = mr->mem, *found; 97 98 mem->stag_valid = 0; 99 100 /* make STag invalid visible asap */ 101 smp_mb(); 102 103 found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8); 104 WARN_ON(found != mem); 105 siw_mem_put(mem); 106 } 107 108 void siw_free_mem(struct kref *ref) 109 { 110 struct siw_mem *mem = container_of(ref, struct siw_mem, ref); 111 112 siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n"); 113 114 if (!mem->is_mw && mem->mem_obj) { 115 if (mem->is_pbl == 0) 116 siw_umem_release(mem->umem); 117 else 118 kfree(mem->pbl); 119 } 120 kfree(mem); 121 } 122 123 /* 124 * siw_check_mem() 125 * 126 * Check protection domain, STAG state, access permissions and 127 * address range for memory object. 128 * 129 * @pd: Protection Domain memory should belong to 130 * @mem: memory to be checked 131 * @addr: starting addr of mem 132 * @perms: requested access permissions 133 * @len: len of memory interval to be checked 134 * 135 */ 136 int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr, 137 enum ib_access_flags perms, int len) 138 { 139 if (!mem->stag_valid) { 140 siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag); 141 return -E_STAG_INVALID; 142 } 143 if (mem->pd != pd) { 144 siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag); 145 return -E_PD_MISMATCH; 146 } 147 /* 148 * check access permissions 149 */ 150 if ((mem->perms & perms) < perms) { 151 siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n", 152 mem->perms, perms); 153 return -E_ACCESS_PERM; 154 } 155 /* 156 * Check if access falls into valid memory interval. 157 */ 158 if (addr < mem->va || addr + len > mem->va + mem->len) { 159 siw_dbg_pd(pd, "MEM interval len %d\n", len); 160 siw_dbg_pd(pd, "[0x%p, 0x%p] out of bounds\n", 161 (void *)(uintptr_t)addr, 162 (void *)(uintptr_t)(addr + len)); 163 siw_dbg_pd(pd, "[0x%p, 0x%p] STag=0x%08x\n", 164 (void *)(uintptr_t)mem->va, 165 (void *)(uintptr_t)(mem->va + mem->len), 166 mem->stag); 167 168 return -E_BASE_BOUNDS; 169 } 170 return E_ACCESS_OK; 171 } 172 173 /* 174 * siw_check_sge() 175 * 176 * Check SGE for access rights in given interval 177 * 178 * @pd: Protection Domain memory should belong to 179 * @sge: SGE to be checked 180 * @mem: location of memory reference within array 181 * @perms: requested access permissions 182 * @off: starting offset in SGE 183 * @len: len of memory interval to be checked 184 * 185 * NOTE: Function references SGE's memory object (mem->obj) 186 * if not yet done. New reference is kept if check went ok and 187 * released if check failed. If mem->obj is already valid, no new 188 * lookup is being done and mem is not released it check fails. 189 */ 190 int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[], 191 enum ib_access_flags perms, u32 off, int len) 192 { 193 struct siw_device *sdev = to_siw_dev(pd->device); 194 struct siw_mem *new = NULL; 195 int rv = E_ACCESS_OK; 196 197 if (len + off > sge->length) { 198 rv = -E_BASE_BOUNDS; 199 goto fail; 200 } 201 if (*mem == NULL) { 202 new = siw_mem_id2obj(sdev, sge->lkey >> 8); 203 if (unlikely(!new)) { 204 siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey); 205 rv = -E_STAG_INVALID; 206 goto fail; 207 } 208 *mem = new; 209 } 210 /* Check if user re-registered with different STag key */ 211 if (unlikely((*mem)->stag != sge->lkey)) { 212 siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey); 213 rv = -E_STAG_INVALID; 214 goto fail; 215 } 216 rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len); 217 if (unlikely(rv)) 218 goto fail; 219 220 return 0; 221 222 fail: 223 if (new) { 224 *mem = NULL; 225 siw_mem_put(new); 226 } 227 return rv; 228 } 229 230 void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op) 231 { 232 switch (op) { 233 case SIW_OP_SEND: 234 case SIW_OP_WRITE: 235 case SIW_OP_SEND_WITH_IMM: 236 case SIW_OP_SEND_REMOTE_INV: 237 case SIW_OP_READ: 238 case SIW_OP_READ_LOCAL_INV: 239 if (!(wqe->sqe.flags & SIW_WQE_INLINE)) 240 siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge); 241 break; 242 243 case SIW_OP_RECEIVE: 244 siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge); 245 break; 246 247 case SIW_OP_READ_RESPONSE: 248 siw_unref_mem_sgl(wqe->mem, 1); 249 break; 250 251 default: 252 /* 253 * SIW_OP_INVAL_STAG and SIW_OP_REG_MR 254 * do not hold memory references 255 */ 256 break; 257 } 258 } 259 260 int siw_invalidate_stag(struct ib_pd *pd, u32 stag) 261 { 262 struct siw_device *sdev = to_siw_dev(pd->device); 263 struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8); 264 int rv = 0; 265 266 if (unlikely(!mem)) { 267 siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag); 268 return -EINVAL; 269 } 270 if (unlikely(mem->pd != pd)) { 271 siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag); 272 rv = -EACCES; 273 goto out; 274 } 275 /* 276 * Per RDMA verbs definition, an STag may already be in invalid 277 * state if invalidation is requested. So no state check here. 278 */ 279 mem->stag_valid = 0; 280 281 siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag); 282 out: 283 siw_mem_put(mem); 284 return rv; 285 } 286 287 /* 288 * Gets physical address backed by PBL element. Address is referenced 289 * by linear byte offset into list of variably sized PB elements. 290 * Optionally, provides remaining len within current element, and 291 * current PBL index for later resume at same element. 292 */ 293 dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx) 294 { 295 int i = idx ? *idx : 0; 296 297 while (i < pbl->num_buf) { 298 struct siw_pble *pble = &pbl->pbe[i]; 299 300 if (pble->pbl_off + pble->size > off) { 301 u64 pble_off = off - pble->pbl_off; 302 303 if (len) 304 *len = pble->size - pble_off; 305 if (idx) 306 *idx = i; 307 308 return pble->addr + pble_off; 309 } 310 i++; 311 } 312 if (len) 313 *len = 0; 314 return 0; 315 } 316 317 struct siw_pbl *siw_pbl_alloc(u32 num_buf) 318 { 319 struct siw_pbl *pbl; 320 321 if (num_buf == 0) 322 return ERR_PTR(-EINVAL); 323 324 pbl = kzalloc(struct_size(pbl, pbe, num_buf), GFP_KERNEL); 325 if (!pbl) 326 return ERR_PTR(-ENOMEM); 327 328 pbl->max_buf = num_buf; 329 330 return pbl; 331 } 332 333 struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start, 334 u64 len, int rights) 335 { 336 struct siw_umem *umem; 337 struct ib_umem *base_mem; 338 struct sg_page_iter sg_iter; 339 struct sg_table *sgt; 340 u64 first_page_va; 341 int num_pages, num_chunks, i, rv = 0; 342 343 if (!len) 344 return ERR_PTR(-EINVAL); 345 346 first_page_va = start & PAGE_MASK; 347 num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT; 348 num_chunks = (num_pages >> CHUNK_SHIFT) + 1; 349 350 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 351 if (!umem) 352 return ERR_PTR(-ENOMEM); 353 354 umem->page_chunk = 355 kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL); 356 if (!umem->page_chunk) { 357 rv = -ENOMEM; 358 goto err_out; 359 } 360 base_mem = ib_umem_get(base_dev, start, len, rights); 361 if (IS_ERR(base_mem)) { 362 rv = PTR_ERR(base_mem); 363 siw_dbg(base_dev, "Cannot pin user memory: %d\n", rv); 364 goto err_out; 365 } 366 umem->fp_addr = first_page_va; 367 umem->base_mem = base_mem; 368 369 sgt = &base_mem->sgt_append.sgt; 370 __sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0); 371 372 if (!__sg_page_iter_next(&sg_iter)) { 373 rv = -EINVAL; 374 goto err_out; 375 } 376 for (i = 0; num_pages > 0; i++) { 377 int nents = min_t(int, num_pages, PAGES_PER_CHUNK); 378 struct page **plist = 379 kcalloc(nents, sizeof(struct page *), GFP_KERNEL); 380 381 if (!plist) { 382 rv = -ENOMEM; 383 goto err_out; 384 } 385 umem->page_chunk[i].plist = plist; 386 while (nents--) { 387 *plist = sg_page_iter_page(&sg_iter); 388 umem->num_pages++; 389 num_pages--; 390 plist++; 391 if (!__sg_page_iter_next(&sg_iter)) 392 break; 393 } 394 } 395 return umem; 396 err_out: 397 siw_umem_release(umem); 398 399 return ERR_PTR(rv); 400 } 401