1 /* 2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include "rxe.h" 35 #include "rxe_loc.h" 36 #include "rxe_queue.h" 37 38 static int rxe_query_device(struct ib_device *dev, 39 struct ib_device_attr *attr, 40 struct ib_udata *uhw) 41 { 42 struct rxe_dev *rxe = to_rdev(dev); 43 44 if (uhw->inlen || uhw->outlen) 45 return -EINVAL; 46 47 *attr = rxe->attr; 48 return 0; 49 } 50 51 static void rxe_eth_speed_to_ib_speed(int speed, u8 *active_speed, 52 u8 *active_width) 53 { 54 if (speed <= 1000) { 55 *active_width = IB_WIDTH_1X; 56 *active_speed = IB_SPEED_SDR; 57 } else if (speed <= 10000) { 58 *active_width = IB_WIDTH_1X; 59 *active_speed = IB_SPEED_FDR10; 60 } else if (speed <= 20000) { 61 *active_width = IB_WIDTH_4X; 62 *active_speed = IB_SPEED_DDR; 63 } else if (speed <= 30000) { 64 *active_width = IB_WIDTH_4X; 65 *active_speed = IB_SPEED_QDR; 66 } else if (speed <= 40000) { 67 *active_width = IB_WIDTH_4X; 68 *active_speed = IB_SPEED_FDR10; 69 } else { 70 *active_width = IB_WIDTH_4X; 71 *active_speed = IB_SPEED_EDR; 72 } 73 } 74 75 static int rxe_query_port(struct ib_device *dev, 76 u8 port_num, struct ib_port_attr *attr) 77 { 78 struct rxe_dev *rxe = to_rdev(dev); 79 struct rxe_port *port; 80 u32 speed; 81 82 if (unlikely(port_num != 1)) { 83 pr_warn("invalid port_number %d\n", port_num); 84 goto err1; 85 } 86 87 port = &rxe->port; 88 89 *attr = port->attr; 90 91 mutex_lock(&rxe->usdev_lock); 92 if (rxe->ndev->ethtool_ops->get_link_ksettings) { 93 struct ethtool_link_ksettings ks; 94 95 rxe->ndev->ethtool_ops->get_link_ksettings(rxe->ndev, &ks); 96 speed = ks.base.speed; 97 } else if (rxe->ndev->ethtool_ops->get_settings) { 98 struct ethtool_cmd cmd; 99 100 rxe->ndev->ethtool_ops->get_settings(rxe->ndev, &cmd); 101 speed = cmd.speed; 102 } else { 103 pr_warn("%s speed is unknown, defaulting to 1000\n", rxe->ndev->name); 104 speed = 1000; 105 } 106 rxe_eth_speed_to_ib_speed(speed, &attr->active_speed, &attr->active_width); 107 mutex_unlock(&rxe->usdev_lock); 108 109 return 0; 110 111 err1: 112 return -EINVAL; 113 } 114 115 static int rxe_query_gid(struct ib_device *device, 116 u8 port_num, int index, union ib_gid *gid) 117 { 118 int ret; 119 120 if (index > RXE_PORT_GID_TBL_LEN) 121 return -EINVAL; 122 123 ret = ib_get_cached_gid(device, port_num, index, gid, NULL); 124 if (ret == -EAGAIN) { 125 memcpy(gid, &zgid, sizeof(*gid)); 126 return 0; 127 } 128 129 return ret; 130 } 131 132 static int rxe_add_gid(struct ib_device *device, u8 port_num, unsigned int 133 index, const union ib_gid *gid, 134 const struct ib_gid_attr *attr, void **context) 135 { 136 if (index >= RXE_PORT_GID_TBL_LEN) 137 return -EINVAL; 138 return 0; 139 } 140 141 static int rxe_del_gid(struct ib_device *device, u8 port_num, unsigned int 142 index, void **context) 143 { 144 if (index >= RXE_PORT_GID_TBL_LEN) 145 return -EINVAL; 146 return 0; 147 } 148 149 static struct net_device *rxe_get_netdev(struct ib_device *device, 150 u8 port_num) 151 { 152 struct rxe_dev *rxe = to_rdev(device); 153 154 if (rxe->ndev) { 155 dev_hold(rxe->ndev); 156 return rxe->ndev; 157 } 158 159 return NULL; 160 } 161 162 static int rxe_query_pkey(struct ib_device *device, 163 u8 port_num, u16 index, u16 *pkey) 164 { 165 struct rxe_dev *rxe = to_rdev(device); 166 struct rxe_port *port; 167 168 if (unlikely(port_num != 1)) { 169 dev_warn(device->dma_device, "invalid port_num = %d\n", 170 port_num); 171 goto err1; 172 } 173 174 port = &rxe->port; 175 176 if (unlikely(index >= port->attr.pkey_tbl_len)) { 177 dev_warn(device->dma_device, "invalid index = %d\n", 178 index); 179 goto err1; 180 } 181 182 *pkey = port->pkey_tbl[index]; 183 return 0; 184 185 err1: 186 return -EINVAL; 187 } 188 189 static int rxe_modify_device(struct ib_device *dev, 190 int mask, struct ib_device_modify *attr) 191 { 192 struct rxe_dev *rxe = to_rdev(dev); 193 194 if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) 195 rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid); 196 197 if (mask & IB_DEVICE_MODIFY_NODE_DESC) { 198 memcpy(rxe->ib_dev.node_desc, 199 attr->node_desc, sizeof(rxe->ib_dev.node_desc)); 200 } 201 202 return 0; 203 } 204 205 static int rxe_modify_port(struct ib_device *dev, 206 u8 port_num, int mask, struct ib_port_modify *attr) 207 { 208 struct rxe_dev *rxe = to_rdev(dev); 209 struct rxe_port *port; 210 211 if (unlikely(port_num != 1)) { 212 pr_warn("invalid port_num = %d\n", port_num); 213 goto err1; 214 } 215 216 port = &rxe->port; 217 218 port->attr.port_cap_flags |= attr->set_port_cap_mask; 219 port->attr.port_cap_flags &= ~attr->clr_port_cap_mask; 220 221 if (mask & IB_PORT_RESET_QKEY_CNTR) 222 port->attr.qkey_viol_cntr = 0; 223 224 return 0; 225 226 err1: 227 return -EINVAL; 228 } 229 230 static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, 231 u8 port_num) 232 { 233 struct rxe_dev *rxe = to_rdev(dev); 234 235 return rxe->ifc_ops->link_layer(rxe, port_num); 236 } 237 238 static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev, 239 struct ib_udata *udata) 240 { 241 struct rxe_dev *rxe = to_rdev(dev); 242 struct rxe_ucontext *uc; 243 244 uc = rxe_alloc(&rxe->uc_pool); 245 return uc ? &uc->ibuc : ERR_PTR(-ENOMEM); 246 } 247 248 static int rxe_dealloc_ucontext(struct ib_ucontext *ibuc) 249 { 250 struct rxe_ucontext *uc = to_ruc(ibuc); 251 252 rxe_drop_ref(uc); 253 return 0; 254 } 255 256 static int rxe_port_immutable(struct ib_device *dev, u8 port_num, 257 struct ib_port_immutable *immutable) 258 { 259 int err; 260 struct ib_port_attr attr; 261 262 err = rxe_query_port(dev, port_num, &attr); 263 if (err) 264 return err; 265 266 immutable->pkey_tbl_len = attr.pkey_tbl_len; 267 immutable->gid_tbl_len = attr.gid_tbl_len; 268 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; 269 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 270 271 return 0; 272 } 273 274 static struct ib_pd *rxe_alloc_pd(struct ib_device *dev, 275 struct ib_ucontext *context, 276 struct ib_udata *udata) 277 { 278 struct rxe_dev *rxe = to_rdev(dev); 279 struct rxe_pd *pd; 280 281 pd = rxe_alloc(&rxe->pd_pool); 282 return pd ? &pd->ibpd : ERR_PTR(-ENOMEM); 283 } 284 285 static int rxe_dealloc_pd(struct ib_pd *ibpd) 286 { 287 struct rxe_pd *pd = to_rpd(ibpd); 288 289 rxe_drop_ref(pd); 290 return 0; 291 } 292 293 static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr, 294 struct rxe_av *av) 295 { 296 int err; 297 union ib_gid sgid; 298 struct ib_gid_attr sgid_attr; 299 300 err = ib_get_cached_gid(&rxe->ib_dev, attr->port_num, 301 attr->grh.sgid_index, &sgid, 302 &sgid_attr); 303 if (err) { 304 pr_err("Failed to query sgid. err = %d\n", err); 305 return err; 306 } 307 308 err = rxe_av_from_attr(rxe, attr->port_num, av, attr); 309 if (!err) 310 err = rxe_av_fill_ip_info(rxe, av, attr, &sgid_attr, &sgid); 311 312 if (sgid_attr.ndev) 313 dev_put(sgid_attr.ndev); 314 return err; 315 } 316 317 static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) 318 { 319 int err; 320 struct rxe_dev *rxe = to_rdev(ibpd->device); 321 struct rxe_pd *pd = to_rpd(ibpd); 322 struct rxe_ah *ah; 323 324 err = rxe_av_chk_attr(rxe, attr); 325 if (err) 326 goto err1; 327 328 ah = rxe_alloc(&rxe->ah_pool); 329 if (!ah) { 330 err = -ENOMEM; 331 goto err1; 332 } 333 334 rxe_add_ref(pd); 335 ah->pd = pd; 336 337 err = rxe_init_av(rxe, attr, &ah->av); 338 if (err) 339 goto err2; 340 341 return &ah->ibah; 342 343 err2: 344 rxe_drop_ref(pd); 345 rxe_drop_ref(ah); 346 err1: 347 return ERR_PTR(err); 348 } 349 350 static int rxe_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) 351 { 352 int err; 353 struct rxe_dev *rxe = to_rdev(ibah->device); 354 struct rxe_ah *ah = to_rah(ibah); 355 356 err = rxe_av_chk_attr(rxe, attr); 357 if (err) 358 return err; 359 360 err = rxe_init_av(rxe, attr, &ah->av); 361 if (err) 362 return err; 363 364 return 0; 365 } 366 367 static int rxe_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) 368 { 369 struct rxe_dev *rxe = to_rdev(ibah->device); 370 struct rxe_ah *ah = to_rah(ibah); 371 372 rxe_av_to_attr(rxe, &ah->av, attr); 373 return 0; 374 } 375 376 static int rxe_destroy_ah(struct ib_ah *ibah) 377 { 378 struct rxe_ah *ah = to_rah(ibah); 379 380 rxe_drop_ref(ah->pd); 381 rxe_drop_ref(ah); 382 return 0; 383 } 384 385 static int post_one_recv(struct rxe_rq *rq, struct ib_recv_wr *ibwr) 386 { 387 int err; 388 int i; 389 u32 length; 390 struct rxe_recv_wqe *recv_wqe; 391 int num_sge = ibwr->num_sge; 392 393 if (unlikely(queue_full(rq->queue))) { 394 err = -ENOMEM; 395 goto err1; 396 } 397 398 if (unlikely(num_sge > rq->max_sge)) { 399 err = -EINVAL; 400 goto err1; 401 } 402 403 length = 0; 404 for (i = 0; i < num_sge; i++) 405 length += ibwr->sg_list[i].length; 406 407 recv_wqe = producer_addr(rq->queue); 408 recv_wqe->wr_id = ibwr->wr_id; 409 recv_wqe->num_sge = num_sge; 410 411 memcpy(recv_wqe->dma.sge, ibwr->sg_list, 412 num_sge * sizeof(struct ib_sge)); 413 414 recv_wqe->dma.length = length; 415 recv_wqe->dma.resid = length; 416 recv_wqe->dma.num_sge = num_sge; 417 recv_wqe->dma.cur_sge = 0; 418 recv_wqe->dma.sge_offset = 0; 419 420 /* make sure all changes to the work queue are written before we 421 * update the producer pointer 422 */ 423 smp_wmb(); 424 425 advance_producer(rq->queue); 426 return 0; 427 428 err1: 429 return err; 430 } 431 432 static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd, 433 struct ib_srq_init_attr *init, 434 struct ib_udata *udata) 435 { 436 int err; 437 struct rxe_dev *rxe = to_rdev(ibpd->device); 438 struct rxe_pd *pd = to_rpd(ibpd); 439 struct rxe_srq *srq; 440 struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL; 441 442 err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK); 443 if (err) 444 goto err1; 445 446 srq = rxe_alloc(&rxe->srq_pool); 447 if (!srq) { 448 err = -ENOMEM; 449 goto err1; 450 } 451 452 rxe_add_index(srq); 453 rxe_add_ref(pd); 454 srq->pd = pd; 455 456 err = rxe_srq_from_init(rxe, srq, init, context, udata); 457 if (err) 458 goto err2; 459 460 return &srq->ibsrq; 461 462 err2: 463 rxe_drop_ref(pd); 464 rxe_drop_index(srq); 465 rxe_drop_ref(srq); 466 err1: 467 return ERR_PTR(err); 468 } 469 470 static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, 471 enum ib_srq_attr_mask mask, 472 struct ib_udata *udata) 473 { 474 int err; 475 struct rxe_srq *srq = to_rsrq(ibsrq); 476 struct rxe_dev *rxe = to_rdev(ibsrq->device); 477 478 err = rxe_srq_chk_attr(rxe, srq, attr, mask); 479 if (err) 480 goto err1; 481 482 err = rxe_srq_from_attr(rxe, srq, attr, mask, udata); 483 if (err) 484 goto err1; 485 486 return 0; 487 488 err1: 489 return err; 490 } 491 492 static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) 493 { 494 struct rxe_srq *srq = to_rsrq(ibsrq); 495 496 if (srq->error) 497 return -EINVAL; 498 499 attr->max_wr = srq->rq.queue->buf->index_mask; 500 attr->max_sge = srq->rq.max_sge; 501 attr->srq_limit = srq->limit; 502 return 0; 503 } 504 505 static int rxe_destroy_srq(struct ib_srq *ibsrq) 506 { 507 struct rxe_srq *srq = to_rsrq(ibsrq); 508 509 if (srq->rq.queue) 510 rxe_queue_cleanup(srq->rq.queue); 511 512 rxe_drop_ref(srq->pd); 513 rxe_drop_index(srq); 514 rxe_drop_ref(srq); 515 516 return 0; 517 } 518 519 static int rxe_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, 520 struct ib_recv_wr **bad_wr) 521 { 522 int err = 0; 523 unsigned long flags; 524 struct rxe_srq *srq = to_rsrq(ibsrq); 525 526 spin_lock_irqsave(&srq->rq.producer_lock, flags); 527 528 while (wr) { 529 err = post_one_recv(&srq->rq, wr); 530 if (unlikely(err)) 531 break; 532 wr = wr->next; 533 } 534 535 spin_unlock_irqrestore(&srq->rq.producer_lock, flags); 536 537 if (err) 538 *bad_wr = wr; 539 540 return err; 541 } 542 543 static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, 544 struct ib_qp_init_attr *init, 545 struct ib_udata *udata) 546 { 547 int err; 548 struct rxe_dev *rxe = to_rdev(ibpd->device); 549 struct rxe_pd *pd = to_rpd(ibpd); 550 struct rxe_qp *qp; 551 552 err = rxe_qp_chk_init(rxe, init); 553 if (err) 554 goto err1; 555 556 qp = rxe_alloc(&rxe->qp_pool); 557 if (!qp) { 558 err = -ENOMEM; 559 goto err1; 560 } 561 562 if (udata) { 563 if (udata->inlen) { 564 err = -EINVAL; 565 goto err1; 566 } 567 qp->is_user = 1; 568 } 569 570 rxe_add_index(qp); 571 572 err = rxe_qp_from_init(rxe, qp, pd, init, udata, ibpd); 573 if (err) 574 goto err2; 575 576 return &qp->ibqp; 577 578 err2: 579 rxe_drop_index(qp); 580 rxe_drop_ref(qp); 581 err1: 582 return ERR_PTR(err); 583 } 584 585 static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 586 int mask, struct ib_udata *udata) 587 { 588 int err; 589 struct rxe_dev *rxe = to_rdev(ibqp->device); 590 struct rxe_qp *qp = to_rqp(ibqp); 591 592 err = rxe_qp_chk_attr(rxe, qp, attr, mask); 593 if (err) 594 goto err1; 595 596 err = rxe_qp_from_attr(qp, attr, mask, udata); 597 if (err) 598 goto err1; 599 600 return 0; 601 602 err1: 603 return err; 604 } 605 606 static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 607 int mask, struct ib_qp_init_attr *init) 608 { 609 struct rxe_qp *qp = to_rqp(ibqp); 610 611 rxe_qp_to_init(qp, init); 612 rxe_qp_to_attr(qp, attr, mask); 613 614 return 0; 615 } 616 617 static int rxe_destroy_qp(struct ib_qp *ibqp) 618 { 619 struct rxe_qp *qp = to_rqp(ibqp); 620 621 rxe_qp_destroy(qp); 622 rxe_drop_index(qp); 623 rxe_drop_ref(qp); 624 return 0; 625 } 626 627 static int validate_send_wr(struct rxe_qp *qp, struct ib_send_wr *ibwr, 628 unsigned int mask, unsigned int length) 629 { 630 int num_sge = ibwr->num_sge; 631 struct rxe_sq *sq = &qp->sq; 632 633 if (unlikely(num_sge > sq->max_sge)) 634 goto err1; 635 636 if (unlikely(mask & WR_ATOMIC_MASK)) { 637 if (length < 8) 638 goto err1; 639 640 if (atomic_wr(ibwr)->remote_addr & 0x7) 641 goto err1; 642 } 643 644 if (unlikely((ibwr->send_flags & IB_SEND_INLINE) && 645 (length > sq->max_inline))) 646 goto err1; 647 648 return 0; 649 650 err1: 651 return -EINVAL; 652 } 653 654 static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, 655 struct ib_send_wr *ibwr) 656 { 657 wr->wr_id = ibwr->wr_id; 658 wr->num_sge = ibwr->num_sge; 659 wr->opcode = ibwr->opcode; 660 wr->send_flags = ibwr->send_flags; 661 662 if (qp_type(qp) == IB_QPT_UD || 663 qp_type(qp) == IB_QPT_SMI || 664 qp_type(qp) == IB_QPT_GSI) { 665 wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn; 666 wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey; 667 if (qp_type(qp) == IB_QPT_GSI) 668 wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index; 669 if (wr->opcode == IB_WR_SEND_WITH_IMM) 670 wr->ex.imm_data = ibwr->ex.imm_data; 671 } else { 672 switch (wr->opcode) { 673 case IB_WR_RDMA_WRITE_WITH_IMM: 674 wr->ex.imm_data = ibwr->ex.imm_data; 675 case IB_WR_RDMA_READ: 676 case IB_WR_RDMA_WRITE: 677 wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr; 678 wr->wr.rdma.rkey = rdma_wr(ibwr)->rkey; 679 break; 680 case IB_WR_SEND_WITH_IMM: 681 wr->ex.imm_data = ibwr->ex.imm_data; 682 break; 683 case IB_WR_SEND_WITH_INV: 684 wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; 685 break; 686 case IB_WR_ATOMIC_CMP_AND_SWP: 687 case IB_WR_ATOMIC_FETCH_AND_ADD: 688 wr->wr.atomic.remote_addr = 689 atomic_wr(ibwr)->remote_addr; 690 wr->wr.atomic.compare_add = 691 atomic_wr(ibwr)->compare_add; 692 wr->wr.atomic.swap = atomic_wr(ibwr)->swap; 693 wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey; 694 break; 695 case IB_WR_LOCAL_INV: 696 wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; 697 break; 698 case IB_WR_REG_MR: 699 wr->wr.reg.mr = reg_wr(ibwr)->mr; 700 wr->wr.reg.key = reg_wr(ibwr)->key; 701 wr->wr.reg.access = reg_wr(ibwr)->access; 702 break; 703 default: 704 break; 705 } 706 } 707 } 708 709 static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr, 710 unsigned int mask, unsigned int length, 711 struct rxe_send_wqe *wqe) 712 { 713 int num_sge = ibwr->num_sge; 714 struct ib_sge *sge; 715 int i; 716 u8 *p; 717 718 init_send_wr(qp, &wqe->wr, ibwr); 719 720 if (qp_type(qp) == IB_QPT_UD || 721 qp_type(qp) == IB_QPT_SMI || 722 qp_type(qp) == IB_QPT_GSI) 723 memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av)); 724 725 if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) { 726 p = wqe->dma.inline_data; 727 728 sge = ibwr->sg_list; 729 for (i = 0; i < num_sge; i++, sge++) { 730 if (qp->is_user && copy_from_user(p, (__user void *) 731 (uintptr_t)sge->addr, sge->length)) 732 return -EFAULT; 733 734 else if (!qp->is_user) 735 memcpy(p, (void *)(uintptr_t)sge->addr, 736 sge->length); 737 738 p += sge->length; 739 } 740 } else if (mask & WR_REG_MASK) { 741 wqe->mask = mask; 742 wqe->state = wqe_state_posted; 743 return 0; 744 } else 745 memcpy(wqe->dma.sge, ibwr->sg_list, 746 num_sge * sizeof(struct ib_sge)); 747 748 wqe->iova = (mask & WR_ATOMIC_MASK) ? 749 atomic_wr(ibwr)->remote_addr : 750 rdma_wr(ibwr)->remote_addr; 751 wqe->mask = mask; 752 wqe->dma.length = length; 753 wqe->dma.resid = length; 754 wqe->dma.num_sge = num_sge; 755 wqe->dma.cur_sge = 0; 756 wqe->dma.sge_offset = 0; 757 wqe->state = wqe_state_posted; 758 wqe->ssn = atomic_add_return(1, &qp->ssn); 759 760 return 0; 761 } 762 763 static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr, 764 unsigned mask, u32 length) 765 { 766 int err; 767 struct rxe_sq *sq = &qp->sq; 768 struct rxe_send_wqe *send_wqe; 769 unsigned long flags; 770 771 err = validate_send_wr(qp, ibwr, mask, length); 772 if (err) 773 return err; 774 775 spin_lock_irqsave(&qp->sq.sq_lock, flags); 776 777 if (unlikely(queue_full(sq->queue))) { 778 err = -ENOMEM; 779 goto err1; 780 } 781 782 send_wqe = producer_addr(sq->queue); 783 784 err = init_send_wqe(qp, ibwr, mask, length, send_wqe); 785 if (unlikely(err)) 786 goto err1; 787 788 /* 789 * make sure all changes to the work queue are 790 * written before we update the producer pointer 791 */ 792 smp_wmb(); 793 794 advance_producer(sq->queue); 795 spin_unlock_irqrestore(&qp->sq.sq_lock, flags); 796 797 return 0; 798 799 err1: 800 spin_unlock_irqrestore(&qp->sq.sq_lock, flags); 801 return err; 802 } 803 804 static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 805 struct ib_send_wr **bad_wr) 806 { 807 int err = 0; 808 struct rxe_qp *qp = to_rqp(ibqp); 809 unsigned int mask; 810 unsigned int length = 0; 811 int i; 812 int must_sched; 813 814 if (unlikely(!qp->valid)) { 815 *bad_wr = wr; 816 return -EINVAL; 817 } 818 819 if (unlikely(qp->req.state < QP_STATE_READY)) { 820 *bad_wr = wr; 821 return -EINVAL; 822 } 823 824 while (wr) { 825 mask = wr_opcode_mask(wr->opcode, qp); 826 if (unlikely(!mask)) { 827 err = -EINVAL; 828 *bad_wr = wr; 829 break; 830 } 831 832 if (unlikely((wr->send_flags & IB_SEND_INLINE) && 833 !(mask & WR_INLINE_MASK))) { 834 err = -EINVAL; 835 *bad_wr = wr; 836 break; 837 } 838 839 length = 0; 840 for (i = 0; i < wr->num_sge; i++) 841 length += wr->sg_list[i].length; 842 843 err = post_one_send(qp, wr, mask, length); 844 845 if (err) { 846 *bad_wr = wr; 847 break; 848 } 849 wr = wr->next; 850 } 851 852 /* 853 * Must sched in case of GSI QP because ib_send_mad() hold irq lock, 854 * and the requester call ip_local_out_sk() that takes spin_lock_bh. 855 */ 856 must_sched = (qp_type(qp) == IB_QPT_GSI) || 857 (queue_count(qp->sq.queue) > 1); 858 859 rxe_run_task(&qp->req.task, must_sched); 860 861 return err; 862 } 863 864 static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 865 struct ib_recv_wr **bad_wr) 866 { 867 int err = 0; 868 struct rxe_qp *qp = to_rqp(ibqp); 869 struct rxe_rq *rq = &qp->rq; 870 unsigned long flags; 871 872 if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) { 873 *bad_wr = wr; 874 err = -EINVAL; 875 goto err1; 876 } 877 878 if (unlikely(qp->srq)) { 879 *bad_wr = wr; 880 err = -EINVAL; 881 goto err1; 882 } 883 884 spin_lock_irqsave(&rq->producer_lock, flags); 885 886 while (wr) { 887 err = post_one_recv(rq, wr); 888 if (unlikely(err)) { 889 *bad_wr = wr; 890 break; 891 } 892 wr = wr->next; 893 } 894 895 spin_unlock_irqrestore(&rq->producer_lock, flags); 896 897 err1: 898 return err; 899 } 900 901 static struct ib_cq *rxe_create_cq(struct ib_device *dev, 902 const struct ib_cq_init_attr *attr, 903 struct ib_ucontext *context, 904 struct ib_udata *udata) 905 { 906 int err; 907 struct rxe_dev *rxe = to_rdev(dev); 908 struct rxe_cq *cq; 909 910 if (attr->flags) 911 return ERR_PTR(-EINVAL); 912 913 err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector, udata); 914 if (err) 915 goto err1; 916 917 cq = rxe_alloc(&rxe->cq_pool); 918 if (!cq) { 919 err = -ENOMEM; 920 goto err1; 921 } 922 923 err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, 924 context, udata); 925 if (err) 926 goto err2; 927 928 return &cq->ibcq; 929 930 err2: 931 rxe_drop_ref(cq); 932 err1: 933 return ERR_PTR(err); 934 } 935 936 static int rxe_destroy_cq(struct ib_cq *ibcq) 937 { 938 struct rxe_cq *cq = to_rcq(ibcq); 939 940 rxe_drop_ref(cq); 941 return 0; 942 } 943 944 static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) 945 { 946 int err; 947 struct rxe_cq *cq = to_rcq(ibcq); 948 struct rxe_dev *rxe = to_rdev(ibcq->device); 949 950 err = rxe_cq_chk_attr(rxe, cq, cqe, 0, udata); 951 if (err) 952 goto err1; 953 954 err = rxe_cq_resize_queue(cq, cqe, udata); 955 if (err) 956 goto err1; 957 958 return 0; 959 960 err1: 961 return err; 962 } 963 964 static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 965 { 966 int i; 967 struct rxe_cq *cq = to_rcq(ibcq); 968 struct rxe_cqe *cqe; 969 unsigned long flags; 970 971 spin_lock_irqsave(&cq->cq_lock, flags); 972 for (i = 0; i < num_entries; i++) { 973 cqe = queue_head(cq->queue); 974 if (!cqe) 975 break; 976 977 memcpy(wc++, &cqe->ibwc, sizeof(*wc)); 978 advance_consumer(cq->queue); 979 } 980 spin_unlock_irqrestore(&cq->cq_lock, flags); 981 982 return i; 983 } 984 985 static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt) 986 { 987 struct rxe_cq *cq = to_rcq(ibcq); 988 int count = queue_count(cq->queue); 989 990 return (count > wc_cnt) ? wc_cnt : count; 991 } 992 993 static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 994 { 995 struct rxe_cq *cq = to_rcq(ibcq); 996 997 if (cq->notify != IB_CQ_NEXT_COMP) 998 cq->notify = flags & IB_CQ_SOLICITED_MASK; 999 1000 return 0; 1001 } 1002 1003 static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) 1004 { 1005 struct rxe_dev *rxe = to_rdev(ibpd->device); 1006 struct rxe_pd *pd = to_rpd(ibpd); 1007 struct rxe_mem *mr; 1008 int err; 1009 1010 mr = rxe_alloc(&rxe->mr_pool); 1011 if (!mr) { 1012 err = -ENOMEM; 1013 goto err1; 1014 } 1015 1016 rxe_add_index(mr); 1017 1018 rxe_add_ref(pd); 1019 1020 err = rxe_mem_init_dma(rxe, pd, access, mr); 1021 if (err) 1022 goto err2; 1023 1024 return &mr->ibmr; 1025 1026 err2: 1027 rxe_drop_ref(pd); 1028 rxe_drop_index(mr); 1029 rxe_drop_ref(mr); 1030 err1: 1031 return ERR_PTR(err); 1032 } 1033 1034 static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, 1035 u64 start, 1036 u64 length, 1037 u64 iova, 1038 int access, struct ib_udata *udata) 1039 { 1040 int err; 1041 struct rxe_dev *rxe = to_rdev(ibpd->device); 1042 struct rxe_pd *pd = to_rpd(ibpd); 1043 struct rxe_mem *mr; 1044 1045 mr = rxe_alloc(&rxe->mr_pool); 1046 if (!mr) { 1047 err = -ENOMEM; 1048 goto err2; 1049 } 1050 1051 rxe_add_index(mr); 1052 1053 rxe_add_ref(pd); 1054 1055 err = rxe_mem_init_user(rxe, pd, start, length, iova, 1056 access, udata, mr); 1057 if (err) 1058 goto err3; 1059 1060 return &mr->ibmr; 1061 1062 err3: 1063 rxe_drop_ref(pd); 1064 rxe_drop_index(mr); 1065 rxe_drop_ref(mr); 1066 err2: 1067 return ERR_PTR(err); 1068 } 1069 1070 static int rxe_dereg_mr(struct ib_mr *ibmr) 1071 { 1072 struct rxe_mem *mr = to_rmr(ibmr); 1073 1074 mr->state = RXE_MEM_STATE_ZOMBIE; 1075 rxe_drop_ref(mr->pd); 1076 rxe_drop_index(mr); 1077 rxe_drop_ref(mr); 1078 return 0; 1079 } 1080 1081 static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, 1082 enum ib_mr_type mr_type, 1083 u32 max_num_sg) 1084 { 1085 struct rxe_dev *rxe = to_rdev(ibpd->device); 1086 struct rxe_pd *pd = to_rpd(ibpd); 1087 struct rxe_mem *mr; 1088 int err; 1089 1090 if (mr_type != IB_MR_TYPE_MEM_REG) 1091 return ERR_PTR(-EINVAL); 1092 1093 mr = rxe_alloc(&rxe->mr_pool); 1094 if (!mr) { 1095 err = -ENOMEM; 1096 goto err1; 1097 } 1098 1099 rxe_add_index(mr); 1100 1101 rxe_add_ref(pd); 1102 1103 err = rxe_mem_init_fast(rxe, pd, max_num_sg, mr); 1104 if (err) 1105 goto err2; 1106 1107 return &mr->ibmr; 1108 1109 err2: 1110 rxe_drop_ref(pd); 1111 rxe_drop_index(mr); 1112 rxe_drop_ref(mr); 1113 err1: 1114 return ERR_PTR(err); 1115 } 1116 1117 static int rxe_set_page(struct ib_mr *ibmr, u64 addr) 1118 { 1119 struct rxe_mem *mr = to_rmr(ibmr); 1120 struct rxe_map *map; 1121 struct rxe_phys_buf *buf; 1122 1123 if (unlikely(mr->nbuf == mr->num_buf)) 1124 return -ENOMEM; 1125 1126 map = mr->map[mr->nbuf / RXE_BUF_PER_MAP]; 1127 buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP]; 1128 1129 buf->addr = addr; 1130 buf->size = ibmr->page_size; 1131 mr->nbuf++; 1132 1133 return 0; 1134 } 1135 1136 static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 1137 unsigned int *sg_offset) 1138 { 1139 struct rxe_mem *mr = to_rmr(ibmr); 1140 int n; 1141 1142 mr->nbuf = 0; 1143 1144 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page); 1145 1146 mr->va = ibmr->iova; 1147 mr->iova = ibmr->iova; 1148 mr->length = ibmr->length; 1149 mr->page_shift = ilog2(ibmr->page_size); 1150 mr->page_mask = ibmr->page_size - 1; 1151 mr->offset = mr->iova & mr->page_mask; 1152 1153 return n; 1154 } 1155 1156 static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) 1157 { 1158 int err; 1159 struct rxe_dev *rxe = to_rdev(ibqp->device); 1160 struct rxe_qp *qp = to_rqp(ibqp); 1161 struct rxe_mc_grp *grp; 1162 1163 /* takes a ref on grp if successful */ 1164 err = rxe_mcast_get_grp(rxe, mgid, &grp); 1165 if (err) 1166 return err; 1167 1168 err = rxe_mcast_add_grp_elem(rxe, qp, grp); 1169 1170 rxe_drop_ref(grp); 1171 return err; 1172 } 1173 1174 static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) 1175 { 1176 struct rxe_dev *rxe = to_rdev(ibqp->device); 1177 struct rxe_qp *qp = to_rqp(ibqp); 1178 1179 return rxe_mcast_drop_grp_elem(rxe, qp, mgid); 1180 } 1181 1182 static ssize_t rxe_show_parent(struct device *device, 1183 struct device_attribute *attr, char *buf) 1184 { 1185 struct rxe_dev *rxe = container_of(device, struct rxe_dev, 1186 ib_dev.dev); 1187 char *name; 1188 1189 name = rxe->ifc_ops->parent_name(rxe, 1); 1190 return snprintf(buf, 16, "%s\n", name); 1191 } 1192 1193 static DEVICE_ATTR(parent, S_IRUGO, rxe_show_parent, NULL); 1194 1195 static struct device_attribute *rxe_dev_attributes[] = { 1196 &dev_attr_parent, 1197 }; 1198 1199 int rxe_register_device(struct rxe_dev *rxe) 1200 { 1201 int err; 1202 int i; 1203 struct ib_device *dev = &rxe->ib_dev; 1204 1205 strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX); 1206 strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); 1207 1208 dev->owner = THIS_MODULE; 1209 dev->node_type = RDMA_NODE_IB_CA; 1210 dev->phys_port_cnt = 1; 1211 dev->num_comp_vectors = RXE_NUM_COMP_VECTORS; 1212 dev->dma_device = rxe->ifc_ops->dma_device(rxe); 1213 dev->local_dma_lkey = 0; 1214 dev->node_guid = rxe->ifc_ops->node_guid(rxe); 1215 dev->dma_ops = &rxe_dma_mapping_ops; 1216 1217 dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION; 1218 dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) 1219 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) 1220 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) 1221 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) 1222 | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) 1223 | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) 1224 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) 1225 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) 1226 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) 1227 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) 1228 | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV) 1229 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) 1230 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) 1231 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) 1232 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) 1233 | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) 1234 | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) 1235 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) 1236 | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ) 1237 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) 1238 | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) 1239 | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ) 1240 | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) 1241 | BIT_ULL(IB_USER_VERBS_CMD_REG_MR) 1242 | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) 1243 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) 1244 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) 1245 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) 1246 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) 1247 | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) 1248 | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) 1249 ; 1250 1251 dev->query_device = rxe_query_device; 1252 dev->modify_device = rxe_modify_device; 1253 dev->query_port = rxe_query_port; 1254 dev->modify_port = rxe_modify_port; 1255 dev->get_link_layer = rxe_get_link_layer; 1256 dev->query_gid = rxe_query_gid; 1257 dev->get_netdev = rxe_get_netdev; 1258 dev->add_gid = rxe_add_gid; 1259 dev->del_gid = rxe_del_gid; 1260 dev->query_pkey = rxe_query_pkey; 1261 dev->alloc_ucontext = rxe_alloc_ucontext; 1262 dev->dealloc_ucontext = rxe_dealloc_ucontext; 1263 dev->mmap = rxe_mmap; 1264 dev->get_port_immutable = rxe_port_immutable; 1265 dev->alloc_pd = rxe_alloc_pd; 1266 dev->dealloc_pd = rxe_dealloc_pd; 1267 dev->create_ah = rxe_create_ah; 1268 dev->modify_ah = rxe_modify_ah; 1269 dev->query_ah = rxe_query_ah; 1270 dev->destroy_ah = rxe_destroy_ah; 1271 dev->create_srq = rxe_create_srq; 1272 dev->modify_srq = rxe_modify_srq; 1273 dev->query_srq = rxe_query_srq; 1274 dev->destroy_srq = rxe_destroy_srq; 1275 dev->post_srq_recv = rxe_post_srq_recv; 1276 dev->create_qp = rxe_create_qp; 1277 dev->modify_qp = rxe_modify_qp; 1278 dev->query_qp = rxe_query_qp; 1279 dev->destroy_qp = rxe_destroy_qp; 1280 dev->post_send = rxe_post_send; 1281 dev->post_recv = rxe_post_recv; 1282 dev->create_cq = rxe_create_cq; 1283 dev->destroy_cq = rxe_destroy_cq; 1284 dev->resize_cq = rxe_resize_cq; 1285 dev->poll_cq = rxe_poll_cq; 1286 dev->peek_cq = rxe_peek_cq; 1287 dev->req_notify_cq = rxe_req_notify_cq; 1288 dev->get_dma_mr = rxe_get_dma_mr; 1289 dev->reg_user_mr = rxe_reg_user_mr; 1290 dev->dereg_mr = rxe_dereg_mr; 1291 dev->alloc_mr = rxe_alloc_mr; 1292 dev->map_mr_sg = rxe_map_mr_sg; 1293 dev->attach_mcast = rxe_attach_mcast; 1294 dev->detach_mcast = rxe_detach_mcast; 1295 1296 err = ib_register_device(dev, NULL); 1297 if (err) { 1298 pr_warn("rxe_register_device failed, err = %d\n", err); 1299 goto err1; 1300 } 1301 1302 for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) { 1303 err = device_create_file(&dev->dev, rxe_dev_attributes[i]); 1304 if (err) { 1305 pr_warn("device_create_file failed, i = %d, err = %d\n", 1306 i, err); 1307 goto err2; 1308 } 1309 } 1310 1311 return 0; 1312 1313 err2: 1314 ib_unregister_device(dev); 1315 err1: 1316 return err; 1317 } 1318 1319 int rxe_unregister_device(struct rxe_dev *rxe) 1320 { 1321 int i; 1322 struct ib_device *dev = &rxe->ib_dev; 1323 1324 for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) 1325 device_remove_file(&dev->dev, rxe_dev_attributes[i]); 1326 1327 ib_unregister_device(dev); 1328 1329 return 0; 1330 } 1331