1 /* 2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include "rxe.h" 35 #include "rxe_loc.h" 36 #include "rxe_queue.h" 37 38 static int rxe_query_device(struct ib_device *dev, 39 struct ib_device_attr *attr, 40 struct ib_udata *uhw) 41 { 42 struct rxe_dev *rxe = to_rdev(dev); 43 44 if (uhw->inlen || uhw->outlen) 45 return -EINVAL; 46 47 *attr = rxe->attr; 48 return 0; 49 } 50 51 static void rxe_eth_speed_to_ib_speed(int speed, u8 *active_speed, 52 u8 *active_width) 53 { 54 if (speed <= 1000) { 55 *active_width = IB_WIDTH_1X; 56 *active_speed = IB_SPEED_SDR; 57 } else if (speed <= 10000) { 58 *active_width = IB_WIDTH_1X; 59 *active_speed = IB_SPEED_FDR10; 60 } else if (speed <= 20000) { 61 *active_width = IB_WIDTH_4X; 62 *active_speed = IB_SPEED_DDR; 63 } else if (speed <= 30000) { 64 *active_width = IB_WIDTH_4X; 65 *active_speed = IB_SPEED_QDR; 66 } else if (speed <= 40000) { 67 *active_width = IB_WIDTH_4X; 68 *active_speed = IB_SPEED_FDR10; 69 } else { 70 *active_width = IB_WIDTH_4X; 71 *active_speed = IB_SPEED_EDR; 72 } 73 } 74 75 static int rxe_query_port(struct ib_device *dev, 76 u8 port_num, struct ib_port_attr *attr) 77 { 78 struct rxe_dev *rxe = to_rdev(dev); 79 struct rxe_port *port; 80 u32 speed; 81 82 if (unlikely(port_num != 1)) { 83 pr_warn("invalid port_number %d\n", port_num); 84 goto err1; 85 } 86 87 port = &rxe->port; 88 89 *attr = port->attr; 90 91 mutex_lock(&rxe->usdev_lock); 92 if (rxe->ndev->ethtool_ops->get_link_ksettings) { 93 struct ethtool_link_ksettings ks; 94 95 rxe->ndev->ethtool_ops->get_link_ksettings(rxe->ndev, &ks); 96 speed = ks.base.speed; 97 } else if (rxe->ndev->ethtool_ops->get_settings) { 98 struct ethtool_cmd cmd; 99 100 rxe->ndev->ethtool_ops->get_settings(rxe->ndev, &cmd); 101 speed = cmd.speed; 102 } else { 103 pr_warn("%s speed is unknown, defaulting to 1000\n", 104 rxe->ndev->name); 105 speed = 1000; 106 } 107 rxe_eth_speed_to_ib_speed(speed, &attr->active_speed, 108 &attr->active_width); 109 mutex_unlock(&rxe->usdev_lock); 110 111 return 0; 112 113 err1: 114 return -EINVAL; 115 } 116 117 static int rxe_query_gid(struct ib_device *device, 118 u8 port_num, int index, union ib_gid *gid) 119 { 120 int ret; 121 122 if (index > RXE_PORT_GID_TBL_LEN) 123 return -EINVAL; 124 125 ret = ib_get_cached_gid(device, port_num, index, gid, NULL); 126 if (ret == -EAGAIN) { 127 memcpy(gid, &zgid, sizeof(*gid)); 128 return 0; 129 } 130 131 return ret; 132 } 133 134 static int rxe_add_gid(struct ib_device *device, u8 port_num, unsigned int 135 index, const union ib_gid *gid, 136 const struct ib_gid_attr *attr, void **context) 137 { 138 if (index >= RXE_PORT_GID_TBL_LEN) 139 return -EINVAL; 140 return 0; 141 } 142 143 static int rxe_del_gid(struct ib_device *device, u8 port_num, unsigned int 144 index, void **context) 145 { 146 if (index >= RXE_PORT_GID_TBL_LEN) 147 return -EINVAL; 148 return 0; 149 } 150 151 static struct net_device *rxe_get_netdev(struct ib_device *device, 152 u8 port_num) 153 { 154 struct rxe_dev *rxe = to_rdev(device); 155 156 if (rxe->ndev) { 157 dev_hold(rxe->ndev); 158 return rxe->ndev; 159 } 160 161 return NULL; 162 } 163 164 static int rxe_query_pkey(struct ib_device *device, 165 u8 port_num, u16 index, u16 *pkey) 166 { 167 struct rxe_dev *rxe = to_rdev(device); 168 struct rxe_port *port; 169 170 if (unlikely(port_num != 1)) { 171 dev_warn(device->dma_device, "invalid port_num = %d\n", 172 port_num); 173 goto err1; 174 } 175 176 port = &rxe->port; 177 178 if (unlikely(index >= port->attr.pkey_tbl_len)) { 179 dev_warn(device->dma_device, "invalid index = %d\n", 180 index); 181 goto err1; 182 } 183 184 *pkey = port->pkey_tbl[index]; 185 return 0; 186 187 err1: 188 return -EINVAL; 189 } 190 191 static int rxe_modify_device(struct ib_device *dev, 192 int mask, struct ib_device_modify *attr) 193 { 194 struct rxe_dev *rxe = to_rdev(dev); 195 196 if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) 197 rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid); 198 199 if (mask & IB_DEVICE_MODIFY_NODE_DESC) { 200 memcpy(rxe->ib_dev.node_desc, 201 attr->node_desc, sizeof(rxe->ib_dev.node_desc)); 202 } 203 204 return 0; 205 } 206 207 static int rxe_modify_port(struct ib_device *dev, 208 u8 port_num, int mask, struct ib_port_modify *attr) 209 { 210 struct rxe_dev *rxe = to_rdev(dev); 211 struct rxe_port *port; 212 213 if (unlikely(port_num != 1)) { 214 pr_warn("invalid port_num = %d\n", port_num); 215 goto err1; 216 } 217 218 port = &rxe->port; 219 220 port->attr.port_cap_flags |= attr->set_port_cap_mask; 221 port->attr.port_cap_flags &= ~attr->clr_port_cap_mask; 222 223 if (mask & IB_PORT_RESET_QKEY_CNTR) 224 port->attr.qkey_viol_cntr = 0; 225 226 return 0; 227 228 err1: 229 return -EINVAL; 230 } 231 232 static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, 233 u8 port_num) 234 { 235 struct rxe_dev *rxe = to_rdev(dev); 236 237 return rxe->ifc_ops->link_layer(rxe, port_num); 238 } 239 240 static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev, 241 struct ib_udata *udata) 242 { 243 struct rxe_dev *rxe = to_rdev(dev); 244 struct rxe_ucontext *uc; 245 246 uc = rxe_alloc(&rxe->uc_pool); 247 return uc ? &uc->ibuc : ERR_PTR(-ENOMEM); 248 } 249 250 static int rxe_dealloc_ucontext(struct ib_ucontext *ibuc) 251 { 252 struct rxe_ucontext *uc = to_ruc(ibuc); 253 254 rxe_drop_ref(uc); 255 return 0; 256 } 257 258 static int rxe_port_immutable(struct ib_device *dev, u8 port_num, 259 struct ib_port_immutable *immutable) 260 { 261 int err; 262 struct ib_port_attr attr; 263 264 err = rxe_query_port(dev, port_num, &attr); 265 if (err) 266 return err; 267 268 immutable->pkey_tbl_len = attr.pkey_tbl_len; 269 immutable->gid_tbl_len = attr.gid_tbl_len; 270 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; 271 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 272 273 return 0; 274 } 275 276 static struct ib_pd *rxe_alloc_pd(struct ib_device *dev, 277 struct ib_ucontext *context, 278 struct ib_udata *udata) 279 { 280 struct rxe_dev *rxe = to_rdev(dev); 281 struct rxe_pd *pd; 282 283 pd = rxe_alloc(&rxe->pd_pool); 284 return pd ? &pd->ibpd : ERR_PTR(-ENOMEM); 285 } 286 287 static int rxe_dealloc_pd(struct ib_pd *ibpd) 288 { 289 struct rxe_pd *pd = to_rpd(ibpd); 290 291 rxe_drop_ref(pd); 292 return 0; 293 } 294 295 static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr, 296 struct rxe_av *av) 297 { 298 int err; 299 union ib_gid sgid; 300 struct ib_gid_attr sgid_attr; 301 302 err = ib_get_cached_gid(&rxe->ib_dev, attr->port_num, 303 attr->grh.sgid_index, &sgid, 304 &sgid_attr); 305 if (err) { 306 pr_err("Failed to query sgid. err = %d\n", err); 307 return err; 308 } 309 310 err = rxe_av_from_attr(rxe, attr->port_num, av, attr); 311 if (!err) 312 err = rxe_av_fill_ip_info(rxe, av, attr, &sgid_attr, &sgid); 313 314 if (sgid_attr.ndev) 315 dev_put(sgid_attr.ndev); 316 return err; 317 } 318 319 static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, 320 struct ib_udata *udata) 321 322 { 323 int err; 324 struct rxe_dev *rxe = to_rdev(ibpd->device); 325 struct rxe_pd *pd = to_rpd(ibpd); 326 struct rxe_ah *ah; 327 328 err = rxe_av_chk_attr(rxe, attr); 329 if (err) 330 goto err1; 331 332 ah = rxe_alloc(&rxe->ah_pool); 333 if (!ah) { 334 err = -ENOMEM; 335 goto err1; 336 } 337 338 rxe_add_ref(pd); 339 ah->pd = pd; 340 341 err = rxe_init_av(rxe, attr, &ah->av); 342 if (err) 343 goto err2; 344 345 return &ah->ibah; 346 347 err2: 348 rxe_drop_ref(pd); 349 rxe_drop_ref(ah); 350 err1: 351 return ERR_PTR(err); 352 } 353 354 static int rxe_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) 355 { 356 int err; 357 struct rxe_dev *rxe = to_rdev(ibah->device); 358 struct rxe_ah *ah = to_rah(ibah); 359 360 err = rxe_av_chk_attr(rxe, attr); 361 if (err) 362 return err; 363 364 err = rxe_init_av(rxe, attr, &ah->av); 365 if (err) 366 return err; 367 368 return 0; 369 } 370 371 static int rxe_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) 372 { 373 struct rxe_dev *rxe = to_rdev(ibah->device); 374 struct rxe_ah *ah = to_rah(ibah); 375 376 rxe_av_to_attr(rxe, &ah->av, attr); 377 return 0; 378 } 379 380 static int rxe_destroy_ah(struct ib_ah *ibah) 381 { 382 struct rxe_ah *ah = to_rah(ibah); 383 384 rxe_drop_ref(ah->pd); 385 rxe_drop_ref(ah); 386 return 0; 387 } 388 389 static int post_one_recv(struct rxe_rq *rq, struct ib_recv_wr *ibwr) 390 { 391 int err; 392 int i; 393 u32 length; 394 struct rxe_recv_wqe *recv_wqe; 395 int num_sge = ibwr->num_sge; 396 397 if (unlikely(queue_full(rq->queue))) { 398 err = -ENOMEM; 399 goto err1; 400 } 401 402 if (unlikely(num_sge > rq->max_sge)) { 403 err = -EINVAL; 404 goto err1; 405 } 406 407 length = 0; 408 for (i = 0; i < num_sge; i++) 409 length += ibwr->sg_list[i].length; 410 411 recv_wqe = producer_addr(rq->queue); 412 recv_wqe->wr_id = ibwr->wr_id; 413 recv_wqe->num_sge = num_sge; 414 415 memcpy(recv_wqe->dma.sge, ibwr->sg_list, 416 num_sge * sizeof(struct ib_sge)); 417 418 recv_wqe->dma.length = length; 419 recv_wqe->dma.resid = length; 420 recv_wqe->dma.num_sge = num_sge; 421 recv_wqe->dma.cur_sge = 0; 422 recv_wqe->dma.sge_offset = 0; 423 424 /* make sure all changes to the work queue are written before we 425 * update the producer pointer 426 */ 427 smp_wmb(); 428 429 advance_producer(rq->queue); 430 return 0; 431 432 err1: 433 return err; 434 } 435 436 static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd, 437 struct ib_srq_init_attr *init, 438 struct ib_udata *udata) 439 { 440 int err; 441 struct rxe_dev *rxe = to_rdev(ibpd->device); 442 struct rxe_pd *pd = to_rpd(ibpd); 443 struct rxe_srq *srq; 444 struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL; 445 446 err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK); 447 if (err) 448 goto err1; 449 450 srq = rxe_alloc(&rxe->srq_pool); 451 if (!srq) { 452 err = -ENOMEM; 453 goto err1; 454 } 455 456 rxe_add_index(srq); 457 rxe_add_ref(pd); 458 srq->pd = pd; 459 460 err = rxe_srq_from_init(rxe, srq, init, context, udata); 461 if (err) 462 goto err2; 463 464 return &srq->ibsrq; 465 466 err2: 467 rxe_drop_ref(pd); 468 rxe_drop_index(srq); 469 rxe_drop_ref(srq); 470 err1: 471 return ERR_PTR(err); 472 } 473 474 static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, 475 enum ib_srq_attr_mask mask, 476 struct ib_udata *udata) 477 { 478 int err; 479 struct rxe_srq *srq = to_rsrq(ibsrq); 480 struct rxe_dev *rxe = to_rdev(ibsrq->device); 481 482 err = rxe_srq_chk_attr(rxe, srq, attr, mask); 483 if (err) 484 goto err1; 485 486 err = rxe_srq_from_attr(rxe, srq, attr, mask, udata); 487 if (err) 488 goto err1; 489 490 return 0; 491 492 err1: 493 return err; 494 } 495 496 static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) 497 { 498 struct rxe_srq *srq = to_rsrq(ibsrq); 499 500 if (srq->error) 501 return -EINVAL; 502 503 attr->max_wr = srq->rq.queue->buf->index_mask; 504 attr->max_sge = srq->rq.max_sge; 505 attr->srq_limit = srq->limit; 506 return 0; 507 } 508 509 static int rxe_destroy_srq(struct ib_srq *ibsrq) 510 { 511 struct rxe_srq *srq = to_rsrq(ibsrq); 512 513 if (srq->rq.queue) 514 rxe_queue_cleanup(srq->rq.queue); 515 516 rxe_drop_ref(srq->pd); 517 rxe_drop_index(srq); 518 rxe_drop_ref(srq); 519 520 return 0; 521 } 522 523 static int rxe_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, 524 struct ib_recv_wr **bad_wr) 525 { 526 int err = 0; 527 unsigned long flags; 528 struct rxe_srq *srq = to_rsrq(ibsrq); 529 530 spin_lock_irqsave(&srq->rq.producer_lock, flags); 531 532 while (wr) { 533 err = post_one_recv(&srq->rq, wr); 534 if (unlikely(err)) 535 break; 536 wr = wr->next; 537 } 538 539 spin_unlock_irqrestore(&srq->rq.producer_lock, flags); 540 541 if (err) 542 *bad_wr = wr; 543 544 return err; 545 } 546 547 static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, 548 struct ib_qp_init_attr *init, 549 struct ib_udata *udata) 550 { 551 int err; 552 struct rxe_dev *rxe = to_rdev(ibpd->device); 553 struct rxe_pd *pd = to_rpd(ibpd); 554 struct rxe_qp *qp; 555 556 err = rxe_qp_chk_init(rxe, init); 557 if (err) 558 goto err1; 559 560 qp = rxe_alloc(&rxe->qp_pool); 561 if (!qp) { 562 err = -ENOMEM; 563 goto err1; 564 } 565 566 if (udata) { 567 if (udata->inlen) { 568 err = -EINVAL; 569 goto err2; 570 } 571 qp->is_user = 1; 572 } 573 574 rxe_add_index(qp); 575 576 err = rxe_qp_from_init(rxe, qp, pd, init, udata, ibpd); 577 if (err) 578 goto err3; 579 580 return &qp->ibqp; 581 582 err3: 583 rxe_drop_index(qp); 584 err2: 585 rxe_drop_ref(qp); 586 err1: 587 return ERR_PTR(err); 588 } 589 590 static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 591 int mask, struct ib_udata *udata) 592 { 593 int err; 594 struct rxe_dev *rxe = to_rdev(ibqp->device); 595 struct rxe_qp *qp = to_rqp(ibqp); 596 597 err = rxe_qp_chk_attr(rxe, qp, attr, mask); 598 if (err) 599 goto err1; 600 601 err = rxe_qp_from_attr(qp, attr, mask, udata); 602 if (err) 603 goto err1; 604 605 return 0; 606 607 err1: 608 return err; 609 } 610 611 static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 612 int mask, struct ib_qp_init_attr *init) 613 { 614 struct rxe_qp *qp = to_rqp(ibqp); 615 616 rxe_qp_to_init(qp, init); 617 rxe_qp_to_attr(qp, attr, mask); 618 619 return 0; 620 } 621 622 static int rxe_destroy_qp(struct ib_qp *ibqp) 623 { 624 struct rxe_qp *qp = to_rqp(ibqp); 625 626 rxe_qp_destroy(qp); 627 rxe_drop_index(qp); 628 rxe_drop_ref(qp); 629 return 0; 630 } 631 632 static int validate_send_wr(struct rxe_qp *qp, struct ib_send_wr *ibwr, 633 unsigned int mask, unsigned int length) 634 { 635 int num_sge = ibwr->num_sge; 636 struct rxe_sq *sq = &qp->sq; 637 638 if (unlikely(num_sge > sq->max_sge)) 639 goto err1; 640 641 if (unlikely(mask & WR_ATOMIC_MASK)) { 642 if (length < 8) 643 goto err1; 644 645 if (atomic_wr(ibwr)->remote_addr & 0x7) 646 goto err1; 647 } 648 649 if (unlikely((ibwr->send_flags & IB_SEND_INLINE) && 650 (length > sq->max_inline))) 651 goto err1; 652 653 return 0; 654 655 err1: 656 return -EINVAL; 657 } 658 659 static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, 660 struct ib_send_wr *ibwr) 661 { 662 wr->wr_id = ibwr->wr_id; 663 wr->num_sge = ibwr->num_sge; 664 wr->opcode = ibwr->opcode; 665 wr->send_flags = ibwr->send_flags; 666 667 if (qp_type(qp) == IB_QPT_UD || 668 qp_type(qp) == IB_QPT_SMI || 669 qp_type(qp) == IB_QPT_GSI) { 670 wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn; 671 wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey; 672 if (qp_type(qp) == IB_QPT_GSI) 673 wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index; 674 if (wr->opcode == IB_WR_SEND_WITH_IMM) 675 wr->ex.imm_data = ibwr->ex.imm_data; 676 } else { 677 switch (wr->opcode) { 678 case IB_WR_RDMA_WRITE_WITH_IMM: 679 wr->ex.imm_data = ibwr->ex.imm_data; 680 case IB_WR_RDMA_READ: 681 case IB_WR_RDMA_WRITE: 682 wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr; 683 wr->wr.rdma.rkey = rdma_wr(ibwr)->rkey; 684 break; 685 case IB_WR_SEND_WITH_IMM: 686 wr->ex.imm_data = ibwr->ex.imm_data; 687 break; 688 case IB_WR_SEND_WITH_INV: 689 wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; 690 break; 691 case IB_WR_ATOMIC_CMP_AND_SWP: 692 case IB_WR_ATOMIC_FETCH_AND_ADD: 693 wr->wr.atomic.remote_addr = 694 atomic_wr(ibwr)->remote_addr; 695 wr->wr.atomic.compare_add = 696 atomic_wr(ibwr)->compare_add; 697 wr->wr.atomic.swap = atomic_wr(ibwr)->swap; 698 wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey; 699 break; 700 case IB_WR_LOCAL_INV: 701 wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; 702 break; 703 case IB_WR_REG_MR: 704 wr->wr.reg.mr = reg_wr(ibwr)->mr; 705 wr->wr.reg.key = reg_wr(ibwr)->key; 706 wr->wr.reg.access = reg_wr(ibwr)->access; 707 break; 708 default: 709 break; 710 } 711 } 712 } 713 714 static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr, 715 unsigned int mask, unsigned int length, 716 struct rxe_send_wqe *wqe) 717 { 718 int num_sge = ibwr->num_sge; 719 struct ib_sge *sge; 720 int i; 721 u8 *p; 722 723 init_send_wr(qp, &wqe->wr, ibwr); 724 725 if (qp_type(qp) == IB_QPT_UD || 726 qp_type(qp) == IB_QPT_SMI || 727 qp_type(qp) == IB_QPT_GSI) 728 memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av)); 729 730 if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) { 731 p = wqe->dma.inline_data; 732 733 sge = ibwr->sg_list; 734 for (i = 0; i < num_sge; i++, sge++) { 735 if (qp->is_user && copy_from_user(p, (__user void *) 736 (uintptr_t)sge->addr, sge->length)) 737 return -EFAULT; 738 739 else if (!qp->is_user) 740 memcpy(p, (void *)(uintptr_t)sge->addr, 741 sge->length); 742 743 p += sge->length; 744 } 745 } else if (mask & WR_REG_MASK) { 746 wqe->mask = mask; 747 wqe->state = wqe_state_posted; 748 return 0; 749 } else 750 memcpy(wqe->dma.sge, ibwr->sg_list, 751 num_sge * sizeof(struct ib_sge)); 752 753 wqe->iova = (mask & WR_ATOMIC_MASK) ? 754 atomic_wr(ibwr)->remote_addr : 755 rdma_wr(ibwr)->remote_addr; 756 wqe->mask = mask; 757 wqe->dma.length = length; 758 wqe->dma.resid = length; 759 wqe->dma.num_sge = num_sge; 760 wqe->dma.cur_sge = 0; 761 wqe->dma.sge_offset = 0; 762 wqe->state = wqe_state_posted; 763 wqe->ssn = atomic_add_return(1, &qp->ssn); 764 765 return 0; 766 } 767 768 static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr, 769 unsigned int mask, u32 length) 770 { 771 int err; 772 struct rxe_sq *sq = &qp->sq; 773 struct rxe_send_wqe *send_wqe; 774 unsigned long flags; 775 776 err = validate_send_wr(qp, ibwr, mask, length); 777 if (err) 778 return err; 779 780 spin_lock_irqsave(&qp->sq.sq_lock, flags); 781 782 if (unlikely(queue_full(sq->queue))) { 783 err = -ENOMEM; 784 goto err1; 785 } 786 787 send_wqe = producer_addr(sq->queue); 788 789 err = init_send_wqe(qp, ibwr, mask, length, send_wqe); 790 if (unlikely(err)) 791 goto err1; 792 793 /* 794 * make sure all changes to the work queue are 795 * written before we update the producer pointer 796 */ 797 smp_wmb(); 798 799 advance_producer(sq->queue); 800 spin_unlock_irqrestore(&qp->sq.sq_lock, flags); 801 802 return 0; 803 804 err1: 805 spin_unlock_irqrestore(&qp->sq.sq_lock, flags); 806 return err; 807 } 808 809 static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr, 810 struct ib_send_wr **bad_wr) 811 { 812 int err = 0; 813 unsigned int mask; 814 unsigned int length = 0; 815 int i; 816 int must_sched; 817 818 while (wr) { 819 mask = wr_opcode_mask(wr->opcode, qp); 820 if (unlikely(!mask)) { 821 err = -EINVAL; 822 *bad_wr = wr; 823 break; 824 } 825 826 if (unlikely((wr->send_flags & IB_SEND_INLINE) && 827 !(mask & WR_INLINE_MASK))) { 828 err = -EINVAL; 829 *bad_wr = wr; 830 break; 831 } 832 833 length = 0; 834 for (i = 0; i < wr->num_sge; i++) 835 length += wr->sg_list[i].length; 836 837 err = post_one_send(qp, wr, mask, length); 838 839 if (err) { 840 *bad_wr = wr; 841 break; 842 } 843 wr = wr->next; 844 } 845 846 /* 847 * Must sched in case of GSI QP because ib_send_mad() hold irq lock, 848 * and the requester call ip_local_out_sk() that takes spin_lock_bh. 849 */ 850 must_sched = (qp_type(qp) == IB_QPT_GSI) || 851 (queue_count(qp->sq.queue) > 1); 852 853 rxe_run_task(&qp->req.task, must_sched); 854 855 return err; 856 } 857 858 static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 859 struct ib_send_wr **bad_wr) 860 { 861 struct rxe_qp *qp = to_rqp(ibqp); 862 863 if (unlikely(!qp->valid)) { 864 *bad_wr = wr; 865 return -EINVAL; 866 } 867 868 if (unlikely(qp->req.state < QP_STATE_READY)) { 869 *bad_wr = wr; 870 return -EINVAL; 871 } 872 873 if (qp->is_user) { 874 /* Utilize process context to do protocol processing */ 875 rxe_run_task(&qp->req.task, 0); 876 return 0; 877 } else 878 return rxe_post_send_kernel(qp, wr, bad_wr); 879 } 880 881 static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 882 struct ib_recv_wr **bad_wr) 883 { 884 int err = 0; 885 struct rxe_qp *qp = to_rqp(ibqp); 886 struct rxe_rq *rq = &qp->rq; 887 unsigned long flags; 888 889 if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) { 890 *bad_wr = wr; 891 err = -EINVAL; 892 goto err1; 893 } 894 895 if (unlikely(qp->srq)) { 896 *bad_wr = wr; 897 err = -EINVAL; 898 goto err1; 899 } 900 901 spin_lock_irqsave(&rq->producer_lock, flags); 902 903 while (wr) { 904 err = post_one_recv(rq, wr); 905 if (unlikely(err)) { 906 *bad_wr = wr; 907 break; 908 } 909 wr = wr->next; 910 } 911 912 spin_unlock_irqrestore(&rq->producer_lock, flags); 913 914 err1: 915 return err; 916 } 917 918 static struct ib_cq *rxe_create_cq(struct ib_device *dev, 919 const struct ib_cq_init_attr *attr, 920 struct ib_ucontext *context, 921 struct ib_udata *udata) 922 { 923 int err; 924 struct rxe_dev *rxe = to_rdev(dev); 925 struct rxe_cq *cq; 926 927 if (attr->flags) 928 return ERR_PTR(-EINVAL); 929 930 err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector, udata); 931 if (err) 932 goto err1; 933 934 cq = rxe_alloc(&rxe->cq_pool); 935 if (!cq) { 936 err = -ENOMEM; 937 goto err1; 938 } 939 940 err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, 941 context, udata); 942 if (err) 943 goto err2; 944 945 return &cq->ibcq; 946 947 err2: 948 rxe_drop_ref(cq); 949 err1: 950 return ERR_PTR(err); 951 } 952 953 static int rxe_destroy_cq(struct ib_cq *ibcq) 954 { 955 struct rxe_cq *cq = to_rcq(ibcq); 956 957 rxe_drop_ref(cq); 958 return 0; 959 } 960 961 static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) 962 { 963 int err; 964 struct rxe_cq *cq = to_rcq(ibcq); 965 struct rxe_dev *rxe = to_rdev(ibcq->device); 966 967 err = rxe_cq_chk_attr(rxe, cq, cqe, 0, udata); 968 if (err) 969 goto err1; 970 971 err = rxe_cq_resize_queue(cq, cqe, udata); 972 if (err) 973 goto err1; 974 975 return 0; 976 977 err1: 978 return err; 979 } 980 981 static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 982 { 983 int i; 984 struct rxe_cq *cq = to_rcq(ibcq); 985 struct rxe_cqe *cqe; 986 unsigned long flags; 987 988 spin_lock_irqsave(&cq->cq_lock, flags); 989 for (i = 0; i < num_entries; i++) { 990 cqe = queue_head(cq->queue); 991 if (!cqe) 992 break; 993 994 memcpy(wc++, &cqe->ibwc, sizeof(*wc)); 995 advance_consumer(cq->queue); 996 } 997 spin_unlock_irqrestore(&cq->cq_lock, flags); 998 999 return i; 1000 } 1001 1002 static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt) 1003 { 1004 struct rxe_cq *cq = to_rcq(ibcq); 1005 int count = queue_count(cq->queue); 1006 1007 return (count > wc_cnt) ? wc_cnt : count; 1008 } 1009 1010 static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 1011 { 1012 struct rxe_cq *cq = to_rcq(ibcq); 1013 unsigned long irq_flags; 1014 int ret = 0; 1015 1016 spin_lock_irqsave(&cq->cq_lock, irq_flags); 1017 if (cq->notify != IB_CQ_NEXT_COMP) 1018 cq->notify = flags & IB_CQ_SOLICITED_MASK; 1019 1020 if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue)) 1021 ret = 1; 1022 1023 spin_unlock_irqrestore(&cq->cq_lock, irq_flags); 1024 1025 return ret; 1026 } 1027 1028 static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) 1029 { 1030 struct rxe_dev *rxe = to_rdev(ibpd->device); 1031 struct rxe_pd *pd = to_rpd(ibpd); 1032 struct rxe_mem *mr; 1033 int err; 1034 1035 mr = rxe_alloc(&rxe->mr_pool); 1036 if (!mr) { 1037 err = -ENOMEM; 1038 goto err1; 1039 } 1040 1041 rxe_add_index(mr); 1042 1043 rxe_add_ref(pd); 1044 1045 err = rxe_mem_init_dma(rxe, pd, access, mr); 1046 if (err) 1047 goto err2; 1048 1049 return &mr->ibmr; 1050 1051 err2: 1052 rxe_drop_ref(pd); 1053 rxe_drop_index(mr); 1054 rxe_drop_ref(mr); 1055 err1: 1056 return ERR_PTR(err); 1057 } 1058 1059 static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, 1060 u64 start, 1061 u64 length, 1062 u64 iova, 1063 int access, struct ib_udata *udata) 1064 { 1065 int err; 1066 struct rxe_dev *rxe = to_rdev(ibpd->device); 1067 struct rxe_pd *pd = to_rpd(ibpd); 1068 struct rxe_mem *mr; 1069 1070 mr = rxe_alloc(&rxe->mr_pool); 1071 if (!mr) { 1072 err = -ENOMEM; 1073 goto err2; 1074 } 1075 1076 rxe_add_index(mr); 1077 1078 rxe_add_ref(pd); 1079 1080 err = rxe_mem_init_user(rxe, pd, start, length, iova, 1081 access, udata, mr); 1082 if (err) 1083 goto err3; 1084 1085 return &mr->ibmr; 1086 1087 err3: 1088 rxe_drop_ref(pd); 1089 rxe_drop_index(mr); 1090 rxe_drop_ref(mr); 1091 err2: 1092 return ERR_PTR(err); 1093 } 1094 1095 static int rxe_dereg_mr(struct ib_mr *ibmr) 1096 { 1097 struct rxe_mem *mr = to_rmr(ibmr); 1098 1099 mr->state = RXE_MEM_STATE_ZOMBIE; 1100 rxe_drop_ref(mr->pd); 1101 rxe_drop_index(mr); 1102 rxe_drop_ref(mr); 1103 return 0; 1104 } 1105 1106 static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, 1107 enum ib_mr_type mr_type, 1108 u32 max_num_sg) 1109 { 1110 struct rxe_dev *rxe = to_rdev(ibpd->device); 1111 struct rxe_pd *pd = to_rpd(ibpd); 1112 struct rxe_mem *mr; 1113 int err; 1114 1115 if (mr_type != IB_MR_TYPE_MEM_REG) 1116 return ERR_PTR(-EINVAL); 1117 1118 mr = rxe_alloc(&rxe->mr_pool); 1119 if (!mr) { 1120 err = -ENOMEM; 1121 goto err1; 1122 } 1123 1124 rxe_add_index(mr); 1125 1126 rxe_add_ref(pd); 1127 1128 err = rxe_mem_init_fast(rxe, pd, max_num_sg, mr); 1129 if (err) 1130 goto err2; 1131 1132 return &mr->ibmr; 1133 1134 err2: 1135 rxe_drop_ref(pd); 1136 rxe_drop_index(mr); 1137 rxe_drop_ref(mr); 1138 err1: 1139 return ERR_PTR(err); 1140 } 1141 1142 static int rxe_set_page(struct ib_mr *ibmr, u64 addr) 1143 { 1144 struct rxe_mem *mr = to_rmr(ibmr); 1145 struct rxe_map *map; 1146 struct rxe_phys_buf *buf; 1147 1148 if (unlikely(mr->nbuf == mr->num_buf)) 1149 return -ENOMEM; 1150 1151 map = mr->map[mr->nbuf / RXE_BUF_PER_MAP]; 1152 buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP]; 1153 1154 buf->addr = addr; 1155 buf->size = ibmr->page_size; 1156 mr->nbuf++; 1157 1158 return 0; 1159 } 1160 1161 static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 1162 int sg_nents, unsigned int *sg_offset) 1163 { 1164 struct rxe_mem *mr = to_rmr(ibmr); 1165 int n; 1166 1167 mr->nbuf = 0; 1168 1169 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page); 1170 1171 mr->va = ibmr->iova; 1172 mr->iova = ibmr->iova; 1173 mr->length = ibmr->length; 1174 mr->page_shift = ilog2(ibmr->page_size); 1175 mr->page_mask = ibmr->page_size - 1; 1176 mr->offset = mr->iova & mr->page_mask; 1177 1178 return n; 1179 } 1180 1181 static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) 1182 { 1183 int err; 1184 struct rxe_dev *rxe = to_rdev(ibqp->device); 1185 struct rxe_qp *qp = to_rqp(ibqp); 1186 struct rxe_mc_grp *grp; 1187 1188 /* takes a ref on grp if successful */ 1189 err = rxe_mcast_get_grp(rxe, mgid, &grp); 1190 if (err) 1191 return err; 1192 1193 err = rxe_mcast_add_grp_elem(rxe, qp, grp); 1194 1195 rxe_drop_ref(grp); 1196 return err; 1197 } 1198 1199 static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) 1200 { 1201 struct rxe_dev *rxe = to_rdev(ibqp->device); 1202 struct rxe_qp *qp = to_rqp(ibqp); 1203 1204 return rxe_mcast_drop_grp_elem(rxe, qp, mgid); 1205 } 1206 1207 static ssize_t rxe_show_parent(struct device *device, 1208 struct device_attribute *attr, char *buf) 1209 { 1210 struct rxe_dev *rxe = container_of(device, struct rxe_dev, 1211 ib_dev.dev); 1212 char *name; 1213 1214 name = rxe->ifc_ops->parent_name(rxe, 1); 1215 return snprintf(buf, 16, "%s\n", name); 1216 } 1217 1218 static DEVICE_ATTR(parent, S_IRUGO, rxe_show_parent, NULL); 1219 1220 static struct device_attribute *rxe_dev_attributes[] = { 1221 &dev_attr_parent, 1222 }; 1223 1224 int rxe_register_device(struct rxe_dev *rxe) 1225 { 1226 int err; 1227 int i; 1228 struct ib_device *dev = &rxe->ib_dev; 1229 1230 strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX); 1231 strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); 1232 1233 dev->owner = THIS_MODULE; 1234 dev->node_type = RDMA_NODE_IB_CA; 1235 dev->phys_port_cnt = 1; 1236 dev->num_comp_vectors = RXE_NUM_COMP_VECTORS; 1237 dev->dma_device = rxe->ifc_ops->dma_device(rxe); 1238 dev->local_dma_lkey = 0; 1239 dev->node_guid = rxe->ifc_ops->node_guid(rxe); 1240 dev->dma_ops = &rxe_dma_mapping_ops; 1241 1242 dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION; 1243 dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) 1244 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) 1245 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) 1246 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) 1247 | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) 1248 | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) 1249 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) 1250 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) 1251 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) 1252 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) 1253 | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV) 1254 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) 1255 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) 1256 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) 1257 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) 1258 | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) 1259 | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) 1260 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) 1261 | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ) 1262 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) 1263 | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) 1264 | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ) 1265 | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) 1266 | BIT_ULL(IB_USER_VERBS_CMD_REG_MR) 1267 | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) 1268 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) 1269 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) 1270 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) 1271 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) 1272 | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) 1273 | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) 1274 ; 1275 1276 dev->query_device = rxe_query_device; 1277 dev->modify_device = rxe_modify_device; 1278 dev->query_port = rxe_query_port; 1279 dev->modify_port = rxe_modify_port; 1280 dev->get_link_layer = rxe_get_link_layer; 1281 dev->query_gid = rxe_query_gid; 1282 dev->get_netdev = rxe_get_netdev; 1283 dev->add_gid = rxe_add_gid; 1284 dev->del_gid = rxe_del_gid; 1285 dev->query_pkey = rxe_query_pkey; 1286 dev->alloc_ucontext = rxe_alloc_ucontext; 1287 dev->dealloc_ucontext = rxe_dealloc_ucontext; 1288 dev->mmap = rxe_mmap; 1289 dev->get_port_immutable = rxe_port_immutable; 1290 dev->alloc_pd = rxe_alloc_pd; 1291 dev->dealloc_pd = rxe_dealloc_pd; 1292 dev->create_ah = rxe_create_ah; 1293 dev->modify_ah = rxe_modify_ah; 1294 dev->query_ah = rxe_query_ah; 1295 dev->destroy_ah = rxe_destroy_ah; 1296 dev->create_srq = rxe_create_srq; 1297 dev->modify_srq = rxe_modify_srq; 1298 dev->query_srq = rxe_query_srq; 1299 dev->destroy_srq = rxe_destroy_srq; 1300 dev->post_srq_recv = rxe_post_srq_recv; 1301 dev->create_qp = rxe_create_qp; 1302 dev->modify_qp = rxe_modify_qp; 1303 dev->query_qp = rxe_query_qp; 1304 dev->destroy_qp = rxe_destroy_qp; 1305 dev->post_send = rxe_post_send; 1306 dev->post_recv = rxe_post_recv; 1307 dev->create_cq = rxe_create_cq; 1308 dev->destroy_cq = rxe_destroy_cq; 1309 dev->resize_cq = rxe_resize_cq; 1310 dev->poll_cq = rxe_poll_cq; 1311 dev->peek_cq = rxe_peek_cq; 1312 dev->req_notify_cq = rxe_req_notify_cq; 1313 dev->get_dma_mr = rxe_get_dma_mr; 1314 dev->reg_user_mr = rxe_reg_user_mr; 1315 dev->dereg_mr = rxe_dereg_mr; 1316 dev->alloc_mr = rxe_alloc_mr; 1317 dev->map_mr_sg = rxe_map_mr_sg; 1318 dev->attach_mcast = rxe_attach_mcast; 1319 dev->detach_mcast = rxe_detach_mcast; 1320 1321 err = ib_register_device(dev, NULL); 1322 if (err) { 1323 pr_warn("rxe_register_device failed, err = %d\n", err); 1324 goto err1; 1325 } 1326 1327 for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) { 1328 err = device_create_file(&dev->dev, rxe_dev_attributes[i]); 1329 if (err) { 1330 pr_warn("device_create_file failed, i = %d, err = %d\n", 1331 i, err); 1332 goto err2; 1333 } 1334 } 1335 1336 return 0; 1337 1338 err2: 1339 ib_unregister_device(dev); 1340 err1: 1341 return err; 1342 } 1343 1344 int rxe_unregister_device(struct rxe_dev *rxe) 1345 { 1346 int i; 1347 struct ib_device *dev = &rxe->ib_dev; 1348 1349 for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) 1350 device_remove_file(&dev->dev, rxe_dev_attributes[i]); 1351 1352 ib_unregister_device(dev); 1353 1354 return 0; 1355 } 1356