1 /* 2 * Copyright(c) 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include <linux/slab.h> 49 #include <linux/vmalloc.h> 50 #include <rdma/ib_umem.h> 51 #include <rdma/rdma_vt.h> 52 #include "vt.h" 53 #include "mr.h" 54 #include "trace.h" 55 56 /** 57 * rvt_driver_mr_init - Init MR resources per driver 58 * @rdi: rvt dev struct 59 * 60 * Do any intilization needed when a driver registers with rdmavt. 61 * 62 * Return: 0 on success or errno on failure 63 */ 64 int rvt_driver_mr_init(struct rvt_dev_info *rdi) 65 { 66 unsigned int lkey_table_size = rdi->dparms.lkey_table_size; 67 unsigned lk_tab_size; 68 int i; 69 70 /* 71 * The top hfi1_lkey_table_size bits are used to index the 72 * table. The lower 8 bits can be owned by the user (copied from 73 * the LKEY). The remaining bits act as a generation number or tag. 74 */ 75 if (!lkey_table_size) 76 return -EINVAL; 77 78 spin_lock_init(&rdi->lkey_table.lock); 79 80 /* ensure generation is at least 4 bits */ 81 if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) { 82 rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n", 83 lkey_table_size, RVT_MAX_LKEY_TABLE_BITS); 84 rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS; 85 lkey_table_size = rdi->dparms.lkey_table_size; 86 } 87 rdi->lkey_table.max = 1 << lkey_table_size; 88 rdi->lkey_table.shift = 32 - lkey_table_size; 89 lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); 90 rdi->lkey_table.table = (struct rvt_mregion __rcu **) 91 vmalloc_node(lk_tab_size, rdi->dparms.node); 92 if (!rdi->lkey_table.table) 93 return -ENOMEM; 94 95 RCU_INIT_POINTER(rdi->dma_mr, NULL); 96 for (i = 0; i < rdi->lkey_table.max; i++) 97 RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL); 98 99 return 0; 100 } 101 102 /** 103 *rvt_mr_exit: clean up MR 104 *@rdi: rvt dev structure 105 * 106 * called when drivers have unregistered or perhaps failed to register with us 107 */ 108 void rvt_mr_exit(struct rvt_dev_info *rdi) 109 { 110 if (rdi->dma_mr) 111 rvt_pr_err(rdi, "DMA MR not null!\n"); 112 113 vfree(rdi->lkey_table.table); 114 } 115 116 static void rvt_deinit_mregion(struct rvt_mregion *mr) 117 { 118 int i = mr->mapsz; 119 120 mr->mapsz = 0; 121 while (i) 122 kfree(mr->map[--i]); 123 } 124 125 static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, 126 int count) 127 { 128 int m, i = 0; 129 struct rvt_dev_info *dev = ib_to_rvt(pd->device); 130 131 mr->mapsz = 0; 132 m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; 133 for (; i < m; i++) { 134 mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL, 135 dev->dparms.node); 136 if (!mr->map[i]) { 137 rvt_deinit_mregion(mr); 138 return -ENOMEM; 139 } 140 mr->mapsz++; 141 } 142 init_completion(&mr->comp); 143 /* count returning the ptr to user */ 144 atomic_set(&mr->refcount, 1); 145 atomic_set(&mr->lkey_invalid, 0); 146 mr->pd = pd; 147 mr->max_segs = count; 148 return 0; 149 } 150 151 /** 152 * rvt_alloc_lkey - allocate an lkey 153 * @mr: memory region that this lkey protects 154 * @dma_region: 0->normal key, 1->restricted DMA key 155 * 156 * Returns 0 if successful, otherwise returns -errno. 157 * 158 * Increments mr reference count as required. 159 * 160 * Sets the lkey field mr for non-dma regions. 161 * 162 */ 163 static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region) 164 { 165 unsigned long flags; 166 u32 r; 167 u32 n; 168 int ret = 0; 169 struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device); 170 struct rvt_lkey_table *rkt = &dev->lkey_table; 171 172 rvt_get_mr(mr); 173 spin_lock_irqsave(&rkt->lock, flags); 174 175 /* special case for dma_mr lkey == 0 */ 176 if (dma_region) { 177 struct rvt_mregion *tmr; 178 179 tmr = rcu_access_pointer(dev->dma_mr); 180 if (!tmr) { 181 rcu_assign_pointer(dev->dma_mr, mr); 182 mr->lkey_published = 1; 183 } else { 184 rvt_put_mr(mr); 185 } 186 goto success; 187 } 188 189 /* Find the next available LKEY */ 190 r = rkt->next; 191 n = r; 192 for (;;) { 193 if (!rcu_access_pointer(rkt->table[r])) 194 break; 195 r = (r + 1) & (rkt->max - 1); 196 if (r == n) 197 goto bail; 198 } 199 rkt->next = (r + 1) & (rkt->max - 1); 200 /* 201 * Make sure lkey is never zero which is reserved to indicate an 202 * unrestricted LKEY. 203 */ 204 rkt->gen++; 205 /* 206 * bits are capped to ensure enough bits for generation number 207 */ 208 mr->lkey = (r << (32 - dev->dparms.lkey_table_size)) | 209 ((((1 << (24 - dev->dparms.lkey_table_size)) - 1) & rkt->gen) 210 << 8); 211 if (mr->lkey == 0) { 212 mr->lkey |= 1 << 8; 213 rkt->gen++; 214 } 215 rcu_assign_pointer(rkt->table[r], mr); 216 mr->lkey_published = 1; 217 success: 218 spin_unlock_irqrestore(&rkt->lock, flags); 219 out: 220 return ret; 221 bail: 222 rvt_put_mr(mr); 223 spin_unlock_irqrestore(&rkt->lock, flags); 224 ret = -ENOMEM; 225 goto out; 226 } 227 228 /** 229 * rvt_free_lkey - free an lkey 230 * @mr: mr to free from tables 231 */ 232 static void rvt_free_lkey(struct rvt_mregion *mr) 233 { 234 unsigned long flags; 235 u32 lkey = mr->lkey; 236 u32 r; 237 struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device); 238 struct rvt_lkey_table *rkt = &dev->lkey_table; 239 int freed = 0; 240 241 spin_lock_irqsave(&rkt->lock, flags); 242 if (!mr->lkey_published) 243 goto out; 244 if (lkey == 0) { 245 RCU_INIT_POINTER(dev->dma_mr, NULL); 246 } else { 247 r = lkey >> (32 - dev->dparms.lkey_table_size); 248 RCU_INIT_POINTER(rkt->table[r], NULL); 249 } 250 mr->lkey_published = 0; 251 freed++; 252 out: 253 spin_unlock_irqrestore(&rkt->lock, flags); 254 if (freed) { 255 synchronize_rcu(); 256 rvt_put_mr(mr); 257 } 258 } 259 260 static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd) 261 { 262 struct rvt_mr *mr; 263 int rval = -ENOMEM; 264 int m; 265 266 /* Allocate struct plus pointers to first level page tables. */ 267 m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; 268 mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL); 269 if (!mr) 270 goto bail; 271 272 rval = rvt_init_mregion(&mr->mr, pd, count); 273 if (rval) 274 goto bail; 275 /* 276 * ib_reg_phys_mr() will initialize mr->ibmr except for 277 * lkey and rkey. 278 */ 279 rval = rvt_alloc_lkey(&mr->mr, 0); 280 if (rval) 281 goto bail_mregion; 282 mr->ibmr.lkey = mr->mr.lkey; 283 mr->ibmr.rkey = mr->mr.lkey; 284 done: 285 return mr; 286 287 bail_mregion: 288 rvt_deinit_mregion(&mr->mr); 289 bail: 290 kfree(mr); 291 mr = ERR_PTR(rval); 292 goto done; 293 } 294 295 static void __rvt_free_mr(struct rvt_mr *mr) 296 { 297 rvt_deinit_mregion(&mr->mr); 298 rvt_free_lkey(&mr->mr); 299 kfree(mr); 300 } 301 302 /** 303 * rvt_get_dma_mr - get a DMA memory region 304 * @pd: protection domain for this memory region 305 * @acc: access flags 306 * 307 * Return: the memory region on success, otherwise returns an errno. 308 * Note that all DMA addresses should be created via the functions in 309 * struct dma_virt_ops. 310 */ 311 struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) 312 { 313 struct rvt_mr *mr; 314 struct ib_mr *ret; 315 int rval; 316 317 if (ibpd_to_rvtpd(pd)->user) 318 return ERR_PTR(-EPERM); 319 320 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 321 if (!mr) { 322 ret = ERR_PTR(-ENOMEM); 323 goto bail; 324 } 325 326 rval = rvt_init_mregion(&mr->mr, pd, 0); 327 if (rval) { 328 ret = ERR_PTR(rval); 329 goto bail; 330 } 331 332 rval = rvt_alloc_lkey(&mr->mr, 1); 333 if (rval) { 334 ret = ERR_PTR(rval); 335 goto bail_mregion; 336 } 337 338 mr->mr.access_flags = acc; 339 ret = &mr->ibmr; 340 done: 341 return ret; 342 343 bail_mregion: 344 rvt_deinit_mregion(&mr->mr); 345 bail: 346 kfree(mr); 347 goto done; 348 } 349 350 /** 351 * rvt_reg_user_mr - register a userspace memory region 352 * @pd: protection domain for this memory region 353 * @start: starting userspace address 354 * @length: length of region to register 355 * @mr_access_flags: access flags for this memory region 356 * @udata: unused by the driver 357 * 358 * Return: the memory region on success, otherwise returns an errno. 359 */ 360 struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 361 u64 virt_addr, int mr_access_flags, 362 struct ib_udata *udata) 363 { 364 struct rvt_mr *mr; 365 struct ib_umem *umem; 366 struct scatterlist *sg; 367 int n, m, entry; 368 struct ib_mr *ret; 369 370 if (length == 0) 371 return ERR_PTR(-EINVAL); 372 373 umem = ib_umem_get(pd->uobject->context, start, length, 374 mr_access_flags, 0); 375 if (IS_ERR(umem)) 376 return (void *)umem; 377 378 n = umem->nmap; 379 380 mr = __rvt_alloc_mr(n, pd); 381 if (IS_ERR(mr)) { 382 ret = (struct ib_mr *)mr; 383 goto bail_umem; 384 } 385 386 mr->mr.user_base = start; 387 mr->mr.iova = virt_addr; 388 mr->mr.length = length; 389 mr->mr.offset = ib_umem_offset(umem); 390 mr->mr.access_flags = mr_access_flags; 391 mr->umem = umem; 392 393 if (is_power_of_2(umem->page_size)) 394 mr->mr.page_shift = ilog2(umem->page_size); 395 m = 0; 396 n = 0; 397 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 398 void *vaddr; 399 400 vaddr = page_address(sg_page(sg)); 401 if (!vaddr) { 402 ret = ERR_PTR(-EINVAL); 403 goto bail_inval; 404 } 405 mr->mr.map[m]->segs[n].vaddr = vaddr; 406 mr->mr.map[m]->segs[n].length = umem->page_size; 407 trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, umem->page_size); 408 n++; 409 if (n == RVT_SEGSZ) { 410 m++; 411 n = 0; 412 } 413 } 414 return &mr->ibmr; 415 416 bail_inval: 417 __rvt_free_mr(mr); 418 419 bail_umem: 420 ib_umem_release(umem); 421 422 return ret; 423 } 424 425 /** 426 * rvt_dereg_mr - unregister and free a memory region 427 * @ibmr: the memory region to free 428 * 429 * 430 * Note that this is called to free MRs created by rvt_get_dma_mr() 431 * or rvt_reg_user_mr(). 432 * 433 * Returns 0 on success. 434 */ 435 int rvt_dereg_mr(struct ib_mr *ibmr) 436 { 437 struct rvt_mr *mr = to_imr(ibmr); 438 struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device); 439 int ret = 0; 440 unsigned long timeout; 441 442 rvt_free_lkey(&mr->mr); 443 444 rvt_put_mr(&mr->mr); /* will set completion if last */ 445 timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ); 446 if (!timeout) { 447 rvt_pr_err(rdi, 448 "rvt_dereg_mr timeout mr %p pd %p refcount %u\n", 449 mr, mr->mr.pd, atomic_read(&mr->mr.refcount)); 450 rvt_get_mr(&mr->mr); 451 ret = -EBUSY; 452 goto out; 453 } 454 rvt_deinit_mregion(&mr->mr); 455 if (mr->umem) 456 ib_umem_release(mr->umem); 457 kfree(mr); 458 out: 459 return ret; 460 } 461 462 /** 463 * rvt_alloc_mr - Allocate a memory region usable with the 464 * @pd: protection domain for this memory region 465 * @mr_type: mem region type 466 * @max_num_sg: Max number of segments allowed 467 * 468 * Return: the memory region on success, otherwise return an errno. 469 */ 470 struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, 471 enum ib_mr_type mr_type, 472 u32 max_num_sg) 473 { 474 struct rvt_mr *mr; 475 476 if (mr_type != IB_MR_TYPE_MEM_REG) 477 return ERR_PTR(-EINVAL); 478 479 mr = __rvt_alloc_mr(max_num_sg, pd); 480 if (IS_ERR(mr)) 481 return (struct ib_mr *)mr; 482 483 return &mr->ibmr; 484 } 485 486 /** 487 * rvt_set_page - page assignment function called by ib_sg_to_pages 488 * @ibmr: memory region 489 * @addr: dma address of mapped page 490 * 491 * Return: 0 on success 492 */ 493 static int rvt_set_page(struct ib_mr *ibmr, u64 addr) 494 { 495 struct rvt_mr *mr = to_imr(ibmr); 496 u32 ps = 1 << mr->mr.page_shift; 497 u32 mapped_segs = mr->mr.length >> mr->mr.page_shift; 498 int m, n; 499 500 if (unlikely(mapped_segs == mr->mr.max_segs)) 501 return -ENOMEM; 502 503 if (mr->mr.length == 0) { 504 mr->mr.user_base = addr; 505 mr->mr.iova = addr; 506 } 507 508 m = mapped_segs / RVT_SEGSZ; 509 n = mapped_segs % RVT_SEGSZ; 510 mr->mr.map[m]->segs[n].vaddr = (void *)addr; 511 mr->mr.map[m]->segs[n].length = ps; 512 trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps); 513 mr->mr.length += ps; 514 515 return 0; 516 } 517 518 /** 519 * rvt_map_mr_sg - map sg list and set it the memory region 520 * @ibmr: memory region 521 * @sg: dma mapped scatterlist 522 * @sg_nents: number of entries in sg 523 * @sg_offset: offset in bytes into sg 524 * 525 * Return: number of sg elements mapped to the memory region 526 */ 527 int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 528 int sg_nents, unsigned int *sg_offset) 529 { 530 struct rvt_mr *mr = to_imr(ibmr); 531 532 mr->mr.length = 0; 533 mr->mr.page_shift = PAGE_SHIFT; 534 return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, 535 rvt_set_page); 536 } 537 538 /** 539 * rvt_fast_reg_mr - fast register physical MR 540 * @qp: the queue pair where the work request comes from 541 * @ibmr: the memory region to be registered 542 * @key: updated key for this memory region 543 * @access: access flags for this memory region 544 * 545 * Returns 0 on success. 546 */ 547 int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, 548 int access) 549 { 550 struct rvt_mr *mr = to_imr(ibmr); 551 552 if (qp->ibqp.pd != mr->mr.pd) 553 return -EACCES; 554 555 /* not applicable to dma MR or user MR */ 556 if (!mr->mr.lkey || mr->umem) 557 return -EINVAL; 558 559 if ((key & 0xFFFFFF00) != (mr->mr.lkey & 0xFFFFFF00)) 560 return -EINVAL; 561 562 ibmr->lkey = key; 563 ibmr->rkey = key; 564 mr->mr.lkey = key; 565 mr->mr.access_flags = access; 566 atomic_set(&mr->mr.lkey_invalid, 0); 567 568 return 0; 569 } 570 EXPORT_SYMBOL(rvt_fast_reg_mr); 571 572 /** 573 * rvt_invalidate_rkey - invalidate an MR rkey 574 * @qp: queue pair associated with the invalidate op 575 * @rkey: rkey to invalidate 576 * 577 * Returns 0 on success. 578 */ 579 int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey) 580 { 581 struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device); 582 struct rvt_lkey_table *rkt = &dev->lkey_table; 583 struct rvt_mregion *mr; 584 585 if (rkey == 0) 586 return -EINVAL; 587 588 rcu_read_lock(); 589 mr = rcu_dereference( 590 rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); 591 if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) 592 goto bail; 593 594 atomic_set(&mr->lkey_invalid, 1); 595 rcu_read_unlock(); 596 return 0; 597 598 bail: 599 rcu_read_unlock(); 600 return -EINVAL; 601 } 602 EXPORT_SYMBOL(rvt_invalidate_rkey); 603 604 /** 605 * rvt_alloc_fmr - allocate a fast memory region 606 * @pd: the protection domain for this memory region 607 * @mr_access_flags: access flags for this memory region 608 * @fmr_attr: fast memory region attributes 609 * 610 * Return: the memory region on success, otherwise returns an errno. 611 */ 612 struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, 613 struct ib_fmr_attr *fmr_attr) 614 { 615 struct rvt_fmr *fmr; 616 int m; 617 struct ib_fmr *ret; 618 int rval = -ENOMEM; 619 620 /* Allocate struct plus pointers to first level page tables. */ 621 m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ; 622 fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL); 623 if (!fmr) 624 goto bail; 625 626 rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages); 627 if (rval) 628 goto bail; 629 630 /* 631 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & 632 * rkey. 633 */ 634 rval = rvt_alloc_lkey(&fmr->mr, 0); 635 if (rval) 636 goto bail_mregion; 637 fmr->ibfmr.rkey = fmr->mr.lkey; 638 fmr->ibfmr.lkey = fmr->mr.lkey; 639 /* 640 * Resources are allocated but no valid mapping (RKEY can't be 641 * used). 642 */ 643 fmr->mr.access_flags = mr_access_flags; 644 fmr->mr.max_segs = fmr_attr->max_pages; 645 fmr->mr.page_shift = fmr_attr->page_shift; 646 647 ret = &fmr->ibfmr; 648 done: 649 return ret; 650 651 bail_mregion: 652 rvt_deinit_mregion(&fmr->mr); 653 bail: 654 kfree(fmr); 655 ret = ERR_PTR(rval); 656 goto done; 657 } 658 659 /** 660 * rvt_map_phys_fmr - set up a fast memory region 661 * @ibmfr: the fast memory region to set up 662 * @page_list: the list of pages to associate with the fast memory region 663 * @list_len: the number of pages to associate with the fast memory region 664 * @iova: the virtual address of the start of the fast memory region 665 * 666 * This may be called from interrupt context. 667 * 668 * Return: 0 on success 669 */ 670 671 int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, 672 int list_len, u64 iova) 673 { 674 struct rvt_fmr *fmr = to_ifmr(ibfmr); 675 struct rvt_lkey_table *rkt; 676 unsigned long flags; 677 int m, n, i; 678 u32 ps; 679 struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device); 680 681 i = atomic_read(&fmr->mr.refcount); 682 if (i > 2) 683 return -EBUSY; 684 685 if (list_len > fmr->mr.max_segs) 686 return -EINVAL; 687 688 rkt = &rdi->lkey_table; 689 spin_lock_irqsave(&rkt->lock, flags); 690 fmr->mr.user_base = iova; 691 fmr->mr.iova = iova; 692 ps = 1 << fmr->mr.page_shift; 693 fmr->mr.length = list_len * ps; 694 m = 0; 695 n = 0; 696 for (i = 0; i < list_len; i++) { 697 fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i]; 698 fmr->mr.map[m]->segs[n].length = ps; 699 trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps); 700 if (++n == RVT_SEGSZ) { 701 m++; 702 n = 0; 703 } 704 } 705 spin_unlock_irqrestore(&rkt->lock, flags); 706 return 0; 707 } 708 709 /** 710 * rvt_unmap_fmr - unmap fast memory regions 711 * @fmr_list: the list of fast memory regions to unmap 712 * 713 * Return: 0 on success. 714 */ 715 int rvt_unmap_fmr(struct list_head *fmr_list) 716 { 717 struct rvt_fmr *fmr; 718 struct rvt_lkey_table *rkt; 719 unsigned long flags; 720 struct rvt_dev_info *rdi; 721 722 list_for_each_entry(fmr, fmr_list, ibfmr.list) { 723 rdi = ib_to_rvt(fmr->ibfmr.device); 724 rkt = &rdi->lkey_table; 725 spin_lock_irqsave(&rkt->lock, flags); 726 fmr->mr.user_base = 0; 727 fmr->mr.iova = 0; 728 fmr->mr.length = 0; 729 spin_unlock_irqrestore(&rkt->lock, flags); 730 } 731 return 0; 732 } 733 734 /** 735 * rvt_dealloc_fmr - deallocate a fast memory region 736 * @ibfmr: the fast memory region to deallocate 737 * 738 * Return: 0 on success. 739 */ 740 int rvt_dealloc_fmr(struct ib_fmr *ibfmr) 741 { 742 struct rvt_fmr *fmr = to_ifmr(ibfmr); 743 int ret = 0; 744 unsigned long timeout; 745 746 rvt_free_lkey(&fmr->mr); 747 rvt_put_mr(&fmr->mr); /* will set completion if last */ 748 timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ); 749 if (!timeout) { 750 rvt_get_mr(&fmr->mr); 751 ret = -EBUSY; 752 goto out; 753 } 754 rvt_deinit_mregion(&fmr->mr); 755 kfree(fmr); 756 out: 757 return ret; 758 } 759 760 /** 761 * rvt_lkey_ok - check IB SGE for validity and initialize 762 * @rkt: table containing lkey to check SGE against 763 * @pd: protection domain 764 * @isge: outgoing internal SGE 765 * @sge: SGE to check 766 * @acc: access flags 767 * 768 * Check the IB SGE for validity and initialize our internal version 769 * of it. 770 * 771 * Return: 1 if valid and successful, otherwise returns 0. 772 * 773 * increments the reference count upon success 774 * 775 */ 776 int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, 777 struct rvt_sge *isge, struct ib_sge *sge, int acc) 778 { 779 struct rvt_mregion *mr; 780 unsigned n, m; 781 size_t off; 782 783 /* 784 * We use LKEY == zero for kernel virtual addresses 785 * (see rvt_get_dma_mr() and dma_virt_ops). 786 */ 787 rcu_read_lock(); 788 if (sge->lkey == 0) { 789 struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); 790 791 if (pd->user) 792 goto bail; 793 mr = rcu_dereference(dev->dma_mr); 794 if (!mr) 795 goto bail; 796 rvt_get_mr(mr); 797 rcu_read_unlock(); 798 799 isge->mr = mr; 800 isge->vaddr = (void *)sge->addr; 801 isge->length = sge->length; 802 isge->sge_length = sge->length; 803 isge->m = 0; 804 isge->n = 0; 805 goto ok; 806 } 807 mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]); 808 if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || 809 mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) 810 goto bail; 811 812 off = sge->addr - mr->user_base; 813 if (unlikely(sge->addr < mr->user_base || 814 off + sge->length > mr->length || 815 (mr->access_flags & acc) != acc)) 816 goto bail; 817 rvt_get_mr(mr); 818 rcu_read_unlock(); 819 820 off += mr->offset; 821 if (mr->page_shift) { 822 /* 823 * page sizes are uniform power of 2 so no loop is necessary 824 * entries_spanned_by_off is the number of times the loop below 825 * would have executed. 826 */ 827 size_t entries_spanned_by_off; 828 829 entries_spanned_by_off = off >> mr->page_shift; 830 off -= (entries_spanned_by_off << mr->page_shift); 831 m = entries_spanned_by_off / RVT_SEGSZ; 832 n = entries_spanned_by_off % RVT_SEGSZ; 833 } else { 834 m = 0; 835 n = 0; 836 while (off >= mr->map[m]->segs[n].length) { 837 off -= mr->map[m]->segs[n].length; 838 n++; 839 if (n >= RVT_SEGSZ) { 840 m++; 841 n = 0; 842 } 843 } 844 } 845 isge->mr = mr; 846 isge->vaddr = mr->map[m]->segs[n].vaddr + off; 847 isge->length = mr->map[m]->segs[n].length - off; 848 isge->sge_length = sge->length; 849 isge->m = m; 850 isge->n = n; 851 ok: 852 return 1; 853 bail: 854 rcu_read_unlock(); 855 return 0; 856 } 857 EXPORT_SYMBOL(rvt_lkey_ok); 858 859 /** 860 * rvt_rkey_ok - check the IB virtual address, length, and RKEY 861 * @qp: qp for validation 862 * @sge: SGE state 863 * @len: length of data 864 * @vaddr: virtual address to place data 865 * @rkey: rkey to check 866 * @acc: access flags 867 * 868 * Return: 1 if successful, otherwise 0. 869 * 870 * increments the reference count upon success 871 */ 872 int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, 873 u32 len, u64 vaddr, u32 rkey, int acc) 874 { 875 struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device); 876 struct rvt_lkey_table *rkt = &dev->lkey_table; 877 struct rvt_mregion *mr; 878 unsigned n, m; 879 size_t off; 880 881 /* 882 * We use RKEY == zero for kernel virtual addresses 883 * (see rvt_get_dma_mr() and dma_virt_ops). 884 */ 885 rcu_read_lock(); 886 if (rkey == 0) { 887 struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd); 888 struct rvt_dev_info *rdi = ib_to_rvt(pd->ibpd.device); 889 890 if (pd->user) 891 goto bail; 892 mr = rcu_dereference(rdi->dma_mr); 893 if (!mr) 894 goto bail; 895 rvt_get_mr(mr); 896 rcu_read_unlock(); 897 898 sge->mr = mr; 899 sge->vaddr = (void *)vaddr; 900 sge->length = len; 901 sge->sge_length = len; 902 sge->m = 0; 903 sge->n = 0; 904 goto ok; 905 } 906 907 mr = rcu_dereference(rkt->table[rkey >> rkt->shift]); 908 if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || 909 mr->lkey != rkey || qp->ibqp.pd != mr->pd)) 910 goto bail; 911 912 off = vaddr - mr->iova; 913 if (unlikely(vaddr < mr->iova || off + len > mr->length || 914 (mr->access_flags & acc) == 0)) 915 goto bail; 916 rvt_get_mr(mr); 917 rcu_read_unlock(); 918 919 off += mr->offset; 920 if (mr->page_shift) { 921 /* 922 * page sizes are uniform power of 2 so no loop is necessary 923 * entries_spanned_by_off is the number of times the loop below 924 * would have executed. 925 */ 926 size_t entries_spanned_by_off; 927 928 entries_spanned_by_off = off >> mr->page_shift; 929 off -= (entries_spanned_by_off << mr->page_shift); 930 m = entries_spanned_by_off / RVT_SEGSZ; 931 n = entries_spanned_by_off % RVT_SEGSZ; 932 } else { 933 m = 0; 934 n = 0; 935 while (off >= mr->map[m]->segs[n].length) { 936 off -= mr->map[m]->segs[n].length; 937 n++; 938 if (n >= RVT_SEGSZ) { 939 m++; 940 n = 0; 941 } 942 } 943 } 944 sge->mr = mr; 945 sge->vaddr = mr->map[m]->segs[n].vaddr + off; 946 sge->length = mr->map[m]->segs[n].length - off; 947 sge->sge_length = len; 948 sge->m = m; 949 sge->n = n; 950 ok: 951 return 1; 952 bail: 953 rcu_read_unlock(); 954 return 0; 955 } 956 EXPORT_SYMBOL(rvt_rkey_ok); 957