1 /* 2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include "rxe.h" 35 #include "rxe_loc.h" 36 37 /* 38 * lfsr (linear feedback shift register) with period 255 39 */ 40 static u8 rxe_get_key(void) 41 { 42 static u32 key = 1; 43 44 key = key << 1; 45 46 key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10)) 47 ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40)); 48 49 key &= 0xff; 50 51 return key; 52 } 53 54 int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) 55 { 56 switch (mem->type) { 57 case RXE_MEM_TYPE_DMA: 58 return 0; 59 60 case RXE_MEM_TYPE_MR: 61 case RXE_MEM_TYPE_FMR: 62 if (iova < mem->iova || 63 length > mem->length || 64 iova > mem->iova + mem->length - length) 65 return -EFAULT; 66 return 0; 67 68 default: 69 return -EFAULT; 70 } 71 } 72 73 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 74 | IB_ACCESS_REMOTE_WRITE \ 75 | IB_ACCESS_REMOTE_ATOMIC) 76 77 static void rxe_mem_init(int access, struct rxe_mem *mem) 78 { 79 u32 lkey = mem->pelem.index << 8 | rxe_get_key(); 80 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 81 82 if (mem->pelem.pool->type == RXE_TYPE_MR) { 83 mem->ibmr.lkey = lkey; 84 mem->ibmr.rkey = rkey; 85 } 86 87 mem->lkey = lkey; 88 mem->rkey = rkey; 89 mem->state = RXE_MEM_STATE_INVALID; 90 mem->type = RXE_MEM_TYPE_NONE; 91 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 92 } 93 94 void rxe_mem_cleanup(struct rxe_pool_entry *arg) 95 { 96 struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem); 97 int i; 98 99 if (mem->umem) 100 ib_umem_release(mem->umem); 101 102 if (mem->map) { 103 for (i = 0; i < mem->num_map; i++) 104 kfree(mem->map[i]); 105 106 kfree(mem->map); 107 } 108 } 109 110 static int rxe_mem_alloc(struct rxe_mem *mem, int num_buf) 111 { 112 int i; 113 int num_map; 114 struct rxe_map **map = mem->map; 115 116 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 117 118 mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 119 if (!mem->map) 120 goto err1; 121 122 for (i = 0; i < num_map; i++) { 123 mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 124 if (!mem->map[i]) 125 goto err2; 126 } 127 128 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 129 130 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 131 mem->map_mask = RXE_BUF_PER_MAP - 1; 132 133 mem->num_buf = num_buf; 134 mem->num_map = num_map; 135 mem->max_buf = num_map * RXE_BUF_PER_MAP; 136 137 return 0; 138 139 err2: 140 for (i--; i >= 0; i--) 141 kfree(mem->map[i]); 142 143 kfree(mem->map); 144 err1: 145 return -ENOMEM; 146 } 147 148 int rxe_mem_init_dma(struct rxe_pd *pd, 149 int access, struct rxe_mem *mem) 150 { 151 rxe_mem_init(access, mem); 152 153 mem->pd = pd; 154 mem->access = access; 155 mem->state = RXE_MEM_STATE_VALID; 156 mem->type = RXE_MEM_TYPE_DMA; 157 158 return 0; 159 } 160 161 int rxe_mem_init_user(struct rxe_pd *pd, u64 start, 162 u64 length, u64 iova, int access, struct ib_udata *udata, 163 struct rxe_mem *mem) 164 { 165 struct rxe_map **map; 166 struct rxe_phys_buf *buf = NULL; 167 struct ib_umem *umem; 168 struct sg_page_iter sg_iter; 169 int num_buf; 170 void *vaddr; 171 int err; 172 173 umem = ib_umem_get(udata, start, length, access, 0); 174 if (IS_ERR(umem)) { 175 pr_warn("err %d from rxe_umem_get\n", 176 (int)PTR_ERR(umem)); 177 err = -EINVAL; 178 goto err1; 179 } 180 181 mem->umem = umem; 182 num_buf = ib_umem_num_pages(umem); 183 184 rxe_mem_init(access, mem); 185 186 err = rxe_mem_alloc(mem, num_buf); 187 if (err) { 188 pr_warn("err %d from rxe_mem_alloc\n", err); 189 ib_umem_release(umem); 190 goto err1; 191 } 192 193 mem->page_shift = PAGE_SHIFT; 194 mem->page_mask = PAGE_SIZE - 1; 195 196 num_buf = 0; 197 map = mem->map; 198 if (length > 0) { 199 buf = map[0]->buf; 200 201 for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { 202 if (num_buf >= RXE_BUF_PER_MAP) { 203 map++; 204 buf = map[0]->buf; 205 num_buf = 0; 206 } 207 208 vaddr = page_address(sg_page_iter_page(&sg_iter)); 209 if (!vaddr) { 210 pr_warn("null vaddr\n"); 211 err = -ENOMEM; 212 goto err1; 213 } 214 215 buf->addr = (uintptr_t)vaddr; 216 buf->size = PAGE_SIZE; 217 num_buf++; 218 buf++; 219 220 } 221 } 222 223 mem->pd = pd; 224 mem->umem = umem; 225 mem->access = access; 226 mem->length = length; 227 mem->iova = iova; 228 mem->va = start; 229 mem->offset = ib_umem_offset(umem); 230 mem->state = RXE_MEM_STATE_VALID; 231 mem->type = RXE_MEM_TYPE_MR; 232 233 return 0; 234 235 err1: 236 return err; 237 } 238 239 int rxe_mem_init_fast(struct rxe_pd *pd, 240 int max_pages, struct rxe_mem *mem) 241 { 242 int err; 243 244 rxe_mem_init(0, mem); 245 246 /* In fastreg, we also set the rkey */ 247 mem->ibmr.rkey = mem->ibmr.lkey; 248 249 err = rxe_mem_alloc(mem, max_pages); 250 if (err) 251 goto err1; 252 253 mem->pd = pd; 254 mem->max_buf = max_pages; 255 mem->state = RXE_MEM_STATE_FREE; 256 mem->type = RXE_MEM_TYPE_MR; 257 258 return 0; 259 260 err1: 261 return err; 262 } 263 264 static void lookup_iova( 265 struct rxe_mem *mem, 266 u64 iova, 267 int *m_out, 268 int *n_out, 269 size_t *offset_out) 270 { 271 size_t offset = iova - mem->iova + mem->offset; 272 int map_index; 273 int buf_index; 274 u64 length; 275 276 if (likely(mem->page_shift)) { 277 *offset_out = offset & mem->page_mask; 278 offset >>= mem->page_shift; 279 *n_out = offset & mem->map_mask; 280 *m_out = offset >> mem->map_shift; 281 } else { 282 map_index = 0; 283 buf_index = 0; 284 285 length = mem->map[map_index]->buf[buf_index].size; 286 287 while (offset >= length) { 288 offset -= length; 289 buf_index++; 290 291 if (buf_index == RXE_BUF_PER_MAP) { 292 map_index++; 293 buf_index = 0; 294 } 295 length = mem->map[map_index]->buf[buf_index].size; 296 } 297 298 *m_out = map_index; 299 *n_out = buf_index; 300 *offset_out = offset; 301 } 302 } 303 304 void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length) 305 { 306 size_t offset; 307 int m, n; 308 void *addr; 309 310 if (mem->state != RXE_MEM_STATE_VALID) { 311 pr_warn("mem not in valid state\n"); 312 addr = NULL; 313 goto out; 314 } 315 316 if (!mem->map) { 317 addr = (void *)(uintptr_t)iova; 318 goto out; 319 } 320 321 if (mem_check_range(mem, iova, length)) { 322 pr_warn("range violation\n"); 323 addr = NULL; 324 goto out; 325 } 326 327 lookup_iova(mem, iova, &m, &n, &offset); 328 329 if (offset + length > mem->map[m]->buf[n].size) { 330 pr_warn("crosses page boundary\n"); 331 addr = NULL; 332 goto out; 333 } 334 335 addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset; 336 337 out: 338 return addr; 339 } 340 341 /* copy data from a range (vaddr, vaddr+length-1) to or from 342 * a mem object starting at iova. Compute incremental value of 343 * crc32 if crcp is not zero. caller must hold a reference to mem 344 */ 345 int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, 346 enum copy_direction dir, u32 *crcp) 347 { 348 int err; 349 int bytes; 350 u8 *va; 351 struct rxe_map **map; 352 struct rxe_phys_buf *buf; 353 int m; 354 int i; 355 size_t offset; 356 u32 crc = crcp ? (*crcp) : 0; 357 358 if (length == 0) 359 return 0; 360 361 if (mem->type == RXE_MEM_TYPE_DMA) { 362 u8 *src, *dest; 363 364 src = (dir == to_mem_obj) ? 365 addr : ((void *)(uintptr_t)iova); 366 367 dest = (dir == to_mem_obj) ? 368 ((void *)(uintptr_t)iova) : addr; 369 370 memcpy(dest, src, length); 371 372 if (crcp) 373 *crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device), 374 *crcp, dest, length); 375 376 return 0; 377 } 378 379 WARN_ON_ONCE(!mem->map); 380 381 err = mem_check_range(mem, iova, length); 382 if (err) { 383 err = -EFAULT; 384 goto err1; 385 } 386 387 lookup_iova(mem, iova, &m, &i, &offset); 388 389 map = mem->map + m; 390 buf = map[0]->buf + i; 391 392 while (length > 0) { 393 u8 *src, *dest; 394 395 va = (u8 *)(uintptr_t)buf->addr + offset; 396 src = (dir == to_mem_obj) ? addr : va; 397 dest = (dir == to_mem_obj) ? va : addr; 398 399 bytes = buf->size - offset; 400 401 if (bytes > length) 402 bytes = length; 403 404 memcpy(dest, src, bytes); 405 406 if (crcp) 407 crc = rxe_crc32(to_rdev(mem->pd->ibpd.device), 408 crc, dest, bytes); 409 410 length -= bytes; 411 addr += bytes; 412 413 offset = 0; 414 buf++; 415 i++; 416 417 if (i == RXE_BUF_PER_MAP) { 418 i = 0; 419 map++; 420 buf = map[0]->buf; 421 } 422 } 423 424 if (crcp) 425 *crcp = crc; 426 427 return 0; 428 429 err1: 430 return err; 431 } 432 433 /* copy data in or out of a wqe, i.e. sg list 434 * under the control of a dma descriptor 435 */ 436 int copy_data( 437 struct rxe_pd *pd, 438 int access, 439 struct rxe_dma_info *dma, 440 void *addr, 441 int length, 442 enum copy_direction dir, 443 u32 *crcp) 444 { 445 int bytes; 446 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 447 int offset = dma->sge_offset; 448 int resid = dma->resid; 449 struct rxe_mem *mem = NULL; 450 u64 iova; 451 int err; 452 453 if (length == 0) 454 return 0; 455 456 if (length > resid) { 457 err = -EINVAL; 458 goto err2; 459 } 460 461 if (sge->length && (offset < sge->length)) { 462 mem = lookup_mem(pd, access, sge->lkey, lookup_local); 463 if (!mem) { 464 err = -EINVAL; 465 goto err1; 466 } 467 } 468 469 while (length > 0) { 470 bytes = length; 471 472 if (offset >= sge->length) { 473 if (mem) { 474 rxe_drop_ref(mem); 475 mem = NULL; 476 } 477 sge++; 478 dma->cur_sge++; 479 offset = 0; 480 481 if (dma->cur_sge >= dma->num_sge) { 482 err = -ENOSPC; 483 goto err2; 484 } 485 486 if (sge->length) { 487 mem = lookup_mem(pd, access, sge->lkey, 488 lookup_local); 489 if (!mem) { 490 err = -EINVAL; 491 goto err1; 492 } 493 } else { 494 continue; 495 } 496 } 497 498 if (bytes > sge->length - offset) 499 bytes = sge->length - offset; 500 501 if (bytes > 0) { 502 iova = sge->addr + offset; 503 504 err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp); 505 if (err) 506 goto err2; 507 508 offset += bytes; 509 resid -= bytes; 510 length -= bytes; 511 addr += bytes; 512 } 513 } 514 515 dma->sge_offset = offset; 516 dma->resid = resid; 517 518 if (mem) 519 rxe_drop_ref(mem); 520 521 return 0; 522 523 err2: 524 if (mem) 525 rxe_drop_ref(mem); 526 err1: 527 return err; 528 } 529 530 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 531 { 532 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 533 int offset = dma->sge_offset; 534 int resid = dma->resid; 535 536 while (length) { 537 unsigned int bytes; 538 539 if (offset >= sge->length) { 540 sge++; 541 dma->cur_sge++; 542 offset = 0; 543 if (dma->cur_sge >= dma->num_sge) 544 return -ENOSPC; 545 } 546 547 bytes = length; 548 549 if (bytes > sge->length - offset) 550 bytes = sge->length - offset; 551 552 offset += bytes; 553 resid -= bytes; 554 length -= bytes; 555 } 556 557 dma->sge_offset = offset; 558 dma->resid = resid; 559 560 return 0; 561 } 562 563 /* (1) find the mem (mr or mw) corresponding to lkey/rkey 564 * depending on lookup_type 565 * (2) verify that the (qp) pd matches the mem pd 566 * (3) verify that the mem can support the requested access 567 * (4) verify that mem state is valid 568 */ 569 struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, 570 enum lookup_type type) 571 { 572 struct rxe_mem *mem; 573 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 574 int index = key >> 8; 575 576 mem = rxe_pool_get_index(&rxe->mr_pool, index); 577 if (!mem) 578 return NULL; 579 580 if (unlikely((type == lookup_local && mem->lkey != key) || 581 (type == lookup_remote && mem->rkey != key) || 582 mem->pd != pd || 583 (access && !(access & mem->access)) || 584 mem->state != RXE_MEM_STATE_VALID)) { 585 rxe_drop_ref(mem); 586 mem = NULL; 587 } 588 589 return mem; 590 } 591 592 int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, 593 u64 *page, int num_pages, u64 iova) 594 { 595 int i; 596 int num_buf; 597 int err; 598 struct rxe_map **map; 599 struct rxe_phys_buf *buf; 600 int page_size; 601 602 if (num_pages > mem->max_buf) { 603 err = -EINVAL; 604 goto err1; 605 } 606 607 num_buf = 0; 608 page_size = 1 << mem->page_shift; 609 map = mem->map; 610 buf = map[0]->buf; 611 612 for (i = 0; i < num_pages; i++) { 613 buf->addr = *page++; 614 buf->size = page_size; 615 buf++; 616 num_buf++; 617 618 if (num_buf == RXE_BUF_PER_MAP) { 619 map++; 620 buf = map[0]->buf; 621 num_buf = 0; 622 } 623 } 624 625 mem->iova = iova; 626 mem->va = iova; 627 mem->length = num_pages << mem->page_shift; 628 mem->state = RXE_MEM_STATE_VALID; 629 630 return 0; 631 632 err1: 633 return err; 634 } 635