1 /* 2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of EITHER the GNU General Public License 6 * version 2 as published by the Free Software Foundation or the BSD 7 * 2-Clause License. This program is distributed in the hope that it 8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED 9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 10 * See the GNU General Public License version 2 for more details at 11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program available in the file COPYING in the main 15 * directory of this source tree. 16 * 17 * The BSD 2-Clause License 18 * 19 * Redistribution and use in source and binary forms, with or 20 * without modification, are permitted provided that the following 21 * conditions are met: 22 * 23 * - Redistributions of source code must retain the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer. 26 * 27 * - Redistributions in binary form must reproduce the above 28 * copyright notice, this list of conditions and the following 29 * disclaimer in the documentation and/or other materials 30 * provided with the distribution. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 43 * OF THE POSSIBILITY OF SUCH DAMAGE. 44 */ 45 46 #include <linux/errno.h> 47 #include <linux/inetdevice.h> 48 #include <linux/init.h> 49 #include <linux/module.h> 50 #include <linux/slab.h> 51 #include <rdma/ib_addr.h> 52 #include <rdma/ib_smi.h> 53 #include <rdma/ib_user_verbs.h> 54 #include <net/addrconf.h> 55 56 #include "pvrdma.h" 57 58 #define DRV_NAME "vmw_pvrdma" 59 #define DRV_VERSION "1.0.1.0-k" 60 61 static DEFINE_MUTEX(pvrdma_device_list_lock); 62 static LIST_HEAD(pvrdma_device_list); 63 static struct workqueue_struct *event_wq; 64 65 static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context); 66 static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); 67 68 static ssize_t hca_type_show(struct device *device, 69 struct device_attribute *attr, char *buf) 70 { 71 return sysfs_emit(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); 72 } 73 static DEVICE_ATTR_RO(hca_type); 74 75 static ssize_t hw_rev_show(struct device *device, 76 struct device_attribute *attr, char *buf) 77 { 78 return sysfs_emit(buf, "%d\n", PVRDMA_REV_ID); 79 } 80 static DEVICE_ATTR_RO(hw_rev); 81 82 static ssize_t board_id_show(struct device *device, 83 struct device_attribute *attr, char *buf) 84 { 85 return sysfs_emit(buf, "%d\n", PVRDMA_BOARD_ID); 86 } 87 static DEVICE_ATTR_RO(board_id); 88 89 static struct attribute *pvrdma_class_attributes[] = { 90 &dev_attr_hw_rev.attr, 91 &dev_attr_hca_type.attr, 92 &dev_attr_board_id.attr, 93 NULL, 94 }; 95 96 static const struct attribute_group pvrdma_attr_group = { 97 .attrs = pvrdma_class_attributes, 98 }; 99 100 static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str) 101 { 102 struct pvrdma_dev *dev = 103 container_of(device, struct pvrdma_dev, ib_dev); 104 snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d\n", 105 (int) (dev->dsr->caps.fw_ver >> 32), 106 (int) (dev->dsr->caps.fw_ver >> 16) & 0xffff, 107 (int) dev->dsr->caps.fw_ver & 0xffff); 108 } 109 110 static int pvrdma_init_device(struct pvrdma_dev *dev) 111 { 112 /* Initialize some device related stuff */ 113 spin_lock_init(&dev->cmd_lock); 114 sema_init(&dev->cmd_sema, 1); 115 atomic_set(&dev->num_qps, 0); 116 atomic_set(&dev->num_srqs, 0); 117 atomic_set(&dev->num_cqs, 0); 118 atomic_set(&dev->num_pds, 0); 119 atomic_set(&dev->num_ahs, 0); 120 121 return 0; 122 } 123 124 static int pvrdma_port_immutable(struct ib_device *ibdev, u32 port_num, 125 struct ib_port_immutable *immutable) 126 { 127 struct pvrdma_dev *dev = to_vdev(ibdev); 128 struct ib_port_attr attr; 129 int err; 130 131 if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V1) 132 immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE; 133 else if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V2) 134 immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; 135 136 err = ib_query_port(ibdev, port_num, &attr); 137 if (err) 138 return err; 139 140 immutable->pkey_tbl_len = attr.pkey_tbl_len; 141 immutable->gid_tbl_len = attr.gid_tbl_len; 142 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 143 return 0; 144 } 145 146 static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port, 147 enum ib_event_type event) 148 { 149 struct ib_event ib_event; 150 151 memset(&ib_event, 0, sizeof(ib_event)); 152 ib_event.device = &dev->ib_dev; 153 ib_event.element.port_num = port; 154 ib_event.event = event; 155 ib_dispatch_event(&ib_event); 156 } 157 158 static void pvrdma_report_event_handle(struct ib_device *ibdev, 159 struct net_device *ndev, 160 unsigned long event) 161 { 162 struct pvrdma_dev *dev = container_of(ibdev, struct pvrdma_dev, ib_dev); 163 164 switch (event) { 165 case NETDEV_DOWN: 166 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); 167 break; 168 case NETDEV_UP: 169 pvrdma_write_reg(dev, PVRDMA_REG_CTL, 170 PVRDMA_DEVICE_CTL_UNQUIESCE); 171 172 mb(); 173 174 if (pvrdma_read_reg(dev, PVRDMA_REG_ERR)) 175 dev_err(&dev->pdev->dev, 176 "failed to activate device during link up\n"); 177 else 178 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); 179 break; 180 181 default: 182 break; 183 } 184 } 185 186 static const struct ib_device_ops pvrdma_dev_ops = { 187 .owner = THIS_MODULE, 188 .driver_id = RDMA_DRIVER_VMW_PVRDMA, 189 .uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION, 190 191 .add_gid = pvrdma_add_gid, 192 .alloc_mr = pvrdma_alloc_mr, 193 .alloc_pd = pvrdma_alloc_pd, 194 .alloc_ucontext = pvrdma_alloc_ucontext, 195 .create_ah = pvrdma_create_ah, 196 .create_cq = pvrdma_create_cq, 197 .create_qp = pvrdma_create_qp, 198 .dealloc_pd = pvrdma_dealloc_pd, 199 .dealloc_ucontext = pvrdma_dealloc_ucontext, 200 .del_gid = pvrdma_del_gid, 201 .dereg_mr = pvrdma_dereg_mr, 202 .destroy_ah = pvrdma_destroy_ah, 203 .destroy_cq = pvrdma_destroy_cq, 204 .destroy_qp = pvrdma_destroy_qp, 205 .device_group = &pvrdma_attr_group, 206 .get_dev_fw_str = pvrdma_get_fw_ver_str, 207 .get_dma_mr = pvrdma_get_dma_mr, 208 .get_link_layer = pvrdma_port_link_layer, 209 .get_port_immutable = pvrdma_port_immutable, 210 .map_mr_sg = pvrdma_map_mr_sg, 211 .mmap = pvrdma_mmap, 212 .modify_port = pvrdma_modify_port, 213 .modify_qp = pvrdma_modify_qp, 214 .poll_cq = pvrdma_poll_cq, 215 .post_recv = pvrdma_post_recv, 216 .post_send = pvrdma_post_send, 217 .query_device = pvrdma_query_device, 218 .query_gid = pvrdma_query_gid, 219 .query_pkey = pvrdma_query_pkey, 220 .query_port = pvrdma_query_port, 221 .query_qp = pvrdma_query_qp, 222 .reg_user_mr = pvrdma_reg_user_mr, 223 .req_notify_cq = pvrdma_req_notify_cq, 224 .report_port_event = pvrdma_report_event_handle, 225 226 INIT_RDMA_OBJ_SIZE(ib_ah, pvrdma_ah, ibah), 227 INIT_RDMA_OBJ_SIZE(ib_cq, pvrdma_cq, ibcq), 228 INIT_RDMA_OBJ_SIZE(ib_pd, pvrdma_pd, ibpd), 229 INIT_RDMA_OBJ_SIZE(ib_qp, pvrdma_qp, ibqp), 230 INIT_RDMA_OBJ_SIZE(ib_ucontext, pvrdma_ucontext, ibucontext), 231 }; 232 233 static const struct ib_device_ops pvrdma_dev_srq_ops = { 234 .create_srq = pvrdma_create_srq, 235 .destroy_srq = pvrdma_destroy_srq, 236 .modify_srq = pvrdma_modify_srq, 237 .query_srq = pvrdma_query_srq, 238 239 INIT_RDMA_OBJ_SIZE(ib_srq, pvrdma_srq, ibsrq), 240 }; 241 242 static int pvrdma_register_device(struct pvrdma_dev *dev) 243 { 244 int ret = -1; 245 246 dev->ib_dev.node_guid = dev->dsr->caps.node_guid; 247 dev->sys_image_guid = dev->dsr->caps.sys_image_guid; 248 dev->flags = 0; 249 dev->ib_dev.num_comp_vectors = 1; 250 dev->ib_dev.dev.parent = &dev->pdev->dev; 251 252 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 253 dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; 254 255 ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_ops); 256 257 mutex_init(&dev->port_mutex); 258 spin_lock_init(&dev->desc_lock); 259 260 dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(struct pvrdma_cq *), 261 GFP_KERNEL); 262 if (!dev->cq_tbl) 263 return ret; 264 spin_lock_init(&dev->cq_tbl_lock); 265 266 dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(struct pvrdma_qp *), 267 GFP_KERNEL); 268 if (!dev->qp_tbl) 269 goto err_cq_free; 270 spin_lock_init(&dev->qp_tbl_lock); 271 272 /* Check if SRQ is supported by backend */ 273 if (dev->dsr->caps.max_srq) { 274 ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops); 275 276 dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, 277 sizeof(struct pvrdma_srq *), 278 GFP_KERNEL); 279 if (!dev->srq_tbl) 280 goto err_qp_free; 281 } 282 ret = ib_device_set_netdev(&dev->ib_dev, dev->netdev, 1); 283 if (ret) 284 goto err_srq_free; 285 spin_lock_init(&dev->srq_tbl_lock); 286 287 ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", &dev->pdev->dev); 288 if (ret) 289 goto err_srq_free; 290 291 dev->ib_active = true; 292 293 return 0; 294 295 err_srq_free: 296 kfree(dev->srq_tbl); 297 err_qp_free: 298 kfree(dev->qp_tbl); 299 err_cq_free: 300 kfree(dev->cq_tbl); 301 302 return ret; 303 } 304 305 static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id) 306 { 307 u32 icr = PVRDMA_INTR_CAUSE_RESPONSE; 308 struct pvrdma_dev *dev = dev_id; 309 310 dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n"); 311 312 if (!dev->pdev->msix_enabled) { 313 /* Legacy intr */ 314 icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR); 315 if (icr == 0) 316 return IRQ_NONE; 317 } 318 319 if (icr == PVRDMA_INTR_CAUSE_RESPONSE) 320 complete(&dev->cmd_done); 321 322 return IRQ_HANDLED; 323 } 324 325 static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type) 326 { 327 struct pvrdma_qp *qp; 328 unsigned long flags; 329 330 spin_lock_irqsave(&dev->qp_tbl_lock, flags); 331 qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp]; 332 if (qp) 333 refcount_inc(&qp->refcnt); 334 spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); 335 336 if (qp && qp->ibqp.event_handler) { 337 struct ib_qp *ibqp = &qp->ibqp; 338 struct ib_event e; 339 340 e.device = ibqp->device; 341 e.element.qp = ibqp; 342 e.event = type; /* 1:1 mapping for now. */ 343 ibqp->event_handler(&e, ibqp->qp_context); 344 } 345 if (qp) { 346 if (refcount_dec_and_test(&qp->refcnt)) 347 complete(&qp->free); 348 } 349 } 350 351 static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type) 352 { 353 struct pvrdma_cq *cq; 354 unsigned long flags; 355 356 spin_lock_irqsave(&dev->cq_tbl_lock, flags); 357 cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq]; 358 if (cq) 359 refcount_inc(&cq->refcnt); 360 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 361 362 if (cq && cq->ibcq.event_handler) { 363 struct ib_cq *ibcq = &cq->ibcq; 364 struct ib_event e; 365 366 e.device = ibcq->device; 367 e.element.cq = ibcq; 368 e.event = type; /* 1:1 mapping for now. */ 369 ibcq->event_handler(&e, ibcq->cq_context); 370 } 371 if (cq) { 372 if (refcount_dec_and_test(&cq->refcnt)) 373 complete(&cq->free); 374 } 375 } 376 377 static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type) 378 { 379 struct pvrdma_srq *srq; 380 unsigned long flags; 381 382 spin_lock_irqsave(&dev->srq_tbl_lock, flags); 383 if (dev->srq_tbl) 384 srq = dev->srq_tbl[srqn % dev->dsr->caps.max_srq]; 385 else 386 srq = NULL; 387 if (srq) 388 refcount_inc(&srq->refcnt); 389 spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); 390 391 if (srq && srq->ibsrq.event_handler) { 392 struct ib_srq *ibsrq = &srq->ibsrq; 393 struct ib_event e; 394 395 e.device = ibsrq->device; 396 e.element.srq = ibsrq; 397 e.event = type; /* 1:1 mapping for now. */ 398 ibsrq->event_handler(&e, ibsrq->srq_context); 399 } 400 if (srq) { 401 if (refcount_dec_and_test(&srq->refcnt)) 402 complete(&srq->free); 403 } 404 } 405 406 static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type) 407 { 408 if (port < 1 || port > dev->dsr->caps.phys_port_cnt) { 409 dev_warn(&dev->pdev->dev, "event on port %d\n", port); 410 return; 411 } 412 413 pvrdma_dispatch_event(dev, port, type); 414 } 415 416 static inline struct pvrdma_eqe *get_eqe(struct pvrdma_dev *dev, unsigned int i) 417 { 418 return (struct pvrdma_eqe *)pvrdma_page_dir_get_ptr( 419 &dev->async_pdir, 420 PAGE_SIZE + 421 sizeof(struct pvrdma_eqe) * i); 422 } 423 424 static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id) 425 { 426 struct pvrdma_dev *dev = dev_id; 427 struct pvrdma_ring *ring = &dev->async_ring_state->rx; 428 int ring_slots = (dev->dsr->async_ring_pages.num_pages - 1) * 429 PAGE_SIZE / sizeof(struct pvrdma_eqe); 430 unsigned int head; 431 432 dev_dbg(&dev->pdev->dev, "interrupt 1 (async event) handler\n"); 433 434 /* 435 * Don't process events until the IB device is registered. Otherwise 436 * we'll try to ib_dispatch_event() on an invalid device. 437 */ 438 if (!dev->ib_active) 439 return IRQ_HANDLED; 440 441 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { 442 struct pvrdma_eqe *eqe; 443 444 eqe = get_eqe(dev, head); 445 446 switch (eqe->type) { 447 case PVRDMA_EVENT_QP_FATAL: 448 case PVRDMA_EVENT_QP_REQ_ERR: 449 case PVRDMA_EVENT_QP_ACCESS_ERR: 450 case PVRDMA_EVENT_COMM_EST: 451 case PVRDMA_EVENT_SQ_DRAINED: 452 case PVRDMA_EVENT_PATH_MIG: 453 case PVRDMA_EVENT_PATH_MIG_ERR: 454 case PVRDMA_EVENT_QP_LAST_WQE_REACHED: 455 pvrdma_qp_event(dev, eqe->info, eqe->type); 456 break; 457 458 case PVRDMA_EVENT_CQ_ERR: 459 pvrdma_cq_event(dev, eqe->info, eqe->type); 460 break; 461 462 case PVRDMA_EVENT_SRQ_ERR: 463 case PVRDMA_EVENT_SRQ_LIMIT_REACHED: 464 pvrdma_srq_event(dev, eqe->info, eqe->type); 465 break; 466 467 case PVRDMA_EVENT_PORT_ACTIVE: 468 case PVRDMA_EVENT_PORT_ERR: 469 case PVRDMA_EVENT_LID_CHANGE: 470 case PVRDMA_EVENT_PKEY_CHANGE: 471 case PVRDMA_EVENT_SM_CHANGE: 472 case PVRDMA_EVENT_CLIENT_REREGISTER: 473 case PVRDMA_EVENT_GID_CHANGE: 474 pvrdma_dev_event(dev, eqe->info, eqe->type); 475 break; 476 477 case PVRDMA_EVENT_DEVICE_FATAL: 478 pvrdma_dev_event(dev, 1, eqe->type); 479 break; 480 481 default: 482 break; 483 } 484 485 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); 486 } 487 488 return IRQ_HANDLED; 489 } 490 491 static inline struct pvrdma_cqne *get_cqne(struct pvrdma_dev *dev, 492 unsigned int i) 493 { 494 return (struct pvrdma_cqne *)pvrdma_page_dir_get_ptr( 495 &dev->cq_pdir, 496 PAGE_SIZE + 497 sizeof(struct pvrdma_cqne) * i); 498 } 499 500 static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) 501 { 502 struct pvrdma_dev *dev = dev_id; 503 struct pvrdma_ring *ring = &dev->cq_ring_state->rx; 504 int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE / 505 sizeof(struct pvrdma_cqne); 506 unsigned int head; 507 508 dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n"); 509 510 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { 511 struct pvrdma_cqne *cqne; 512 struct pvrdma_cq *cq; 513 514 cqne = get_cqne(dev, head); 515 spin_lock(&dev->cq_tbl_lock); 516 cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq]; 517 if (cq) 518 refcount_inc(&cq->refcnt); 519 spin_unlock(&dev->cq_tbl_lock); 520 521 if (cq && cq->ibcq.comp_handler) 522 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 523 if (cq) { 524 if (refcount_dec_and_test(&cq->refcnt)) 525 complete(&cq->free); 526 } 527 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); 528 } 529 530 return IRQ_HANDLED; 531 } 532 533 static void pvrdma_free_irq(struct pvrdma_dev *dev) 534 { 535 int i; 536 537 dev_dbg(&dev->pdev->dev, "freeing interrupts\n"); 538 for (i = 0; i < dev->nr_vectors; i++) 539 free_irq(pci_irq_vector(dev->pdev, i), dev); 540 } 541 542 static void pvrdma_enable_intrs(struct pvrdma_dev *dev) 543 { 544 dev_dbg(&dev->pdev->dev, "enable interrupts\n"); 545 pvrdma_write_reg(dev, PVRDMA_REG_IMR, 0); 546 } 547 548 static void pvrdma_disable_intrs(struct pvrdma_dev *dev) 549 { 550 dev_dbg(&dev->pdev->dev, "disable interrupts\n"); 551 pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0); 552 } 553 554 static int pvrdma_alloc_intrs(struct pvrdma_dev *dev) 555 { 556 struct pci_dev *pdev = dev->pdev; 557 int ret = 0, i; 558 559 ret = pci_alloc_irq_vectors(pdev, 1, PVRDMA_MAX_INTERRUPTS, 560 PCI_IRQ_MSIX); 561 if (ret < 0) { 562 ret = pci_alloc_irq_vectors(pdev, 1, 1, 563 PCI_IRQ_MSI | PCI_IRQ_INTX); 564 if (ret < 0) 565 return ret; 566 } 567 dev->nr_vectors = ret; 568 569 ret = request_irq(pci_irq_vector(dev->pdev, 0), pvrdma_intr0_handler, 570 pdev->msix_enabled ? 0 : IRQF_SHARED, DRV_NAME, dev); 571 if (ret) { 572 dev_err(&dev->pdev->dev, 573 "failed to request interrupt 0\n"); 574 goto out_free_vectors; 575 } 576 577 for (i = 1; i < dev->nr_vectors; i++) { 578 ret = request_irq(pci_irq_vector(dev->pdev, i), 579 i == 1 ? pvrdma_intr1_handler : 580 pvrdma_intrx_handler, 581 0, DRV_NAME, dev); 582 if (ret) { 583 dev_err(&dev->pdev->dev, 584 "failed to request interrupt %d\n", i); 585 goto free_irqs; 586 } 587 } 588 589 return 0; 590 591 free_irqs: 592 while (--i >= 0) 593 free_irq(pci_irq_vector(dev->pdev, i), dev); 594 out_free_vectors: 595 pci_free_irq_vectors(pdev); 596 return ret; 597 } 598 599 static void pvrdma_free_slots(struct pvrdma_dev *dev) 600 { 601 struct pci_dev *pdev = dev->pdev; 602 603 if (dev->resp_slot) 604 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->resp_slot, 605 dev->dsr->resp_slot_dma); 606 if (dev->cmd_slot) 607 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->cmd_slot, 608 dev->dsr->cmd_slot_dma); 609 } 610 611 static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, 612 const union ib_gid *gid, 613 u8 gid_type, 614 int index) 615 { 616 int ret; 617 union pvrdma_cmd_req req; 618 struct pvrdma_cmd_create_bind *cmd_bind = &req.create_bind; 619 620 if (!dev->sgid_tbl) { 621 dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); 622 return -EINVAL; 623 } 624 625 memset(cmd_bind, 0, sizeof(*cmd_bind)); 626 cmd_bind->hdr.cmd = PVRDMA_CMD_CREATE_BIND; 627 memcpy(cmd_bind->new_gid, gid->raw, 16); 628 cmd_bind->mtu = ib_mtu_enum_to_int(IB_MTU_1024); 629 cmd_bind->vlan = 0xfff; 630 cmd_bind->index = index; 631 cmd_bind->gid_type = gid_type; 632 633 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 634 if (ret < 0) { 635 dev_warn(&dev->pdev->dev, 636 "could not create binding, error: %d\n", ret); 637 return -EFAULT; 638 } 639 memcpy(&dev->sgid_tbl[index], gid, sizeof(*gid)); 640 return 0; 641 } 642 643 static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context) 644 { 645 struct pvrdma_dev *dev = to_vdev(attr->device); 646 647 return pvrdma_add_gid_at_index(dev, &attr->gid, 648 ib_gid_type_to_pvrdma(attr->gid_type), 649 attr->index); 650 } 651 652 static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index) 653 { 654 int ret; 655 union pvrdma_cmd_req req; 656 struct pvrdma_cmd_destroy_bind *cmd_dest = &req.destroy_bind; 657 658 /* Update sgid table. */ 659 if (!dev->sgid_tbl) { 660 dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); 661 return -EINVAL; 662 } 663 664 memset(cmd_dest, 0, sizeof(*cmd_dest)); 665 cmd_dest->hdr.cmd = PVRDMA_CMD_DESTROY_BIND; 666 memcpy(cmd_dest->dest_gid, &dev->sgid_tbl[index], 16); 667 cmd_dest->index = index; 668 669 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 670 if (ret < 0) { 671 dev_warn(&dev->pdev->dev, 672 "could not destroy binding, error: %d\n", ret); 673 return ret; 674 } 675 memset(&dev->sgid_tbl[index], 0, 16); 676 return 0; 677 } 678 679 static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context) 680 { 681 struct pvrdma_dev *dev = to_vdev(attr->device); 682 683 dev_dbg(&dev->pdev->dev, "removing gid at index %u from %s", 684 attr->index, dev->netdev->name); 685 686 return pvrdma_del_gid_at_index(dev, attr->index); 687 } 688 689 static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, 690 struct net_device *ndev, 691 unsigned long event) 692 { 693 struct pci_dev *pdev_net; 694 unsigned int slot; 695 696 switch (event) { 697 case NETDEV_REBOOT: 698 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); 699 break; 700 case NETDEV_UNREGISTER: 701 ib_device_set_netdev(&dev->ib_dev, NULL, 1); 702 dev_put(dev->netdev); 703 dev->netdev = NULL; 704 break; 705 case NETDEV_REGISTER: 706 /* vmxnet3 will have same bus, slot. But func will be 0 */ 707 slot = PCI_SLOT(dev->pdev->devfn); 708 pdev_net = pci_get_slot(dev->pdev->bus, 709 PCI_DEVFN(slot, 0)); 710 if ((dev->netdev == NULL) && 711 (pci_get_drvdata(pdev_net) == ndev)) { 712 /* this is our netdev */ 713 ib_device_set_netdev(&dev->ib_dev, ndev, 1); 714 dev->netdev = ndev; 715 dev_hold(ndev); 716 } 717 pci_dev_put(pdev_net); 718 break; 719 720 default: 721 dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", 722 event, dev_name(&dev->ib_dev.dev)); 723 break; 724 } 725 } 726 727 static void pvrdma_netdevice_event_work(struct work_struct *work) 728 { 729 struct pvrdma_netdevice_work *netdev_work; 730 struct pvrdma_dev *dev; 731 732 netdev_work = container_of(work, struct pvrdma_netdevice_work, work); 733 734 mutex_lock(&pvrdma_device_list_lock); 735 list_for_each_entry(dev, &pvrdma_device_list, device_link) { 736 if ((netdev_work->event == NETDEV_REGISTER) || 737 (dev->netdev == netdev_work->event_netdev)) { 738 pvrdma_netdevice_event_handle(dev, 739 netdev_work->event_netdev, 740 netdev_work->event); 741 break; 742 } 743 } 744 mutex_unlock(&pvrdma_device_list_lock); 745 746 kfree(netdev_work); 747 } 748 749 static int pvrdma_netdevice_event(struct notifier_block *this, 750 unsigned long event, void *ptr) 751 { 752 struct net_device *event_netdev = netdev_notifier_info_to_dev(ptr); 753 struct pvrdma_netdevice_work *netdev_work; 754 755 netdev_work = kmalloc(sizeof(*netdev_work), GFP_ATOMIC); 756 if (!netdev_work) 757 return NOTIFY_BAD; 758 759 INIT_WORK(&netdev_work->work, pvrdma_netdevice_event_work); 760 netdev_work->event_netdev = event_netdev; 761 netdev_work->event = event; 762 queue_work(event_wq, &netdev_work->work); 763 764 return NOTIFY_DONE; 765 } 766 767 static int pvrdma_pci_probe(struct pci_dev *pdev, 768 const struct pci_device_id *id) 769 { 770 struct pci_dev *pdev_net; 771 struct pvrdma_dev *dev; 772 int ret; 773 unsigned long start; 774 unsigned long len; 775 dma_addr_t slot_dma = 0; 776 777 dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev)); 778 779 /* Allocate zero-out device */ 780 dev = ib_alloc_device(pvrdma_dev, ib_dev); 781 if (!dev) { 782 dev_err(&pdev->dev, "failed to allocate IB device\n"); 783 return -ENOMEM; 784 } 785 786 mutex_lock(&pvrdma_device_list_lock); 787 list_add(&dev->device_link, &pvrdma_device_list); 788 mutex_unlock(&pvrdma_device_list_lock); 789 790 ret = pvrdma_init_device(dev); 791 if (ret) 792 goto err_free_device; 793 794 dev->pdev = pdev; 795 pci_set_drvdata(pdev, dev); 796 797 ret = pci_enable_device(pdev); 798 if (ret) { 799 dev_err(&pdev->dev, "cannot enable PCI device\n"); 800 goto err_free_device; 801 } 802 803 dev_dbg(&pdev->dev, "PCI resource flags BAR0 %#lx\n", 804 pci_resource_flags(pdev, 0)); 805 dev_dbg(&pdev->dev, "PCI resource len %#llx\n", 806 (unsigned long long)pci_resource_len(pdev, 0)); 807 dev_dbg(&pdev->dev, "PCI resource start %#llx\n", 808 (unsigned long long)pci_resource_start(pdev, 0)); 809 dev_dbg(&pdev->dev, "PCI resource flags BAR1 %#lx\n", 810 pci_resource_flags(pdev, 1)); 811 dev_dbg(&pdev->dev, "PCI resource len %#llx\n", 812 (unsigned long long)pci_resource_len(pdev, 1)); 813 dev_dbg(&pdev->dev, "PCI resource start %#llx\n", 814 (unsigned long long)pci_resource_start(pdev, 1)); 815 816 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || 817 !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { 818 dev_err(&pdev->dev, "PCI BAR region not MMIO\n"); 819 ret = -ENOMEM; 820 goto err_disable_pdev; 821 } 822 823 ret = pci_request_regions(pdev, DRV_NAME); 824 if (ret) { 825 dev_err(&pdev->dev, "cannot request PCI resources\n"); 826 goto err_disable_pdev; 827 } 828 829 /* Enable 64-Bit DMA */ 830 ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 831 if (ret) { 832 dev_err(&pdev->dev, "dma_set_mask failed\n"); 833 goto err_free_resource; 834 } 835 dma_set_max_seg_size(&pdev->dev, UINT_MAX); 836 pci_set_master(pdev); 837 838 /* Map register space */ 839 start = pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_REG); 840 len = pci_resource_len(dev->pdev, PVRDMA_PCI_RESOURCE_REG); 841 dev->regs = ioremap(start, len); 842 if (!dev->regs) { 843 dev_err(&pdev->dev, "register mapping failed\n"); 844 ret = -ENOMEM; 845 goto err_free_resource; 846 } 847 848 /* Setup per-device UAR. */ 849 dev->driver_uar.index = 0; 850 dev->driver_uar.pfn = 851 pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >> 852 PAGE_SHIFT; 853 dev->driver_uar.map = 854 ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); 855 if (!dev->driver_uar.map) { 856 dev_err(&pdev->dev, "failed to remap UAR pages\n"); 857 ret = -ENOMEM; 858 goto err_unmap_regs; 859 } 860 861 dev->dsr_version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION); 862 dev_info(&pdev->dev, "device version %d, driver version %d\n", 863 dev->dsr_version, PVRDMA_VERSION); 864 865 dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr), 866 &dev->dsrbase, GFP_KERNEL); 867 if (!dev->dsr) { 868 dev_err(&pdev->dev, "failed to allocate shared region\n"); 869 ret = -ENOMEM; 870 goto err_uar_unmap; 871 } 872 873 /* Setup the shared region */ 874 dev->dsr->driver_version = PVRDMA_VERSION; 875 dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ? 876 PVRDMA_GOS_BITS_32 : 877 PVRDMA_GOS_BITS_64; 878 dev->dsr->gos_info.gos_type = PVRDMA_GOS_TYPE_LINUX; 879 dev->dsr->gos_info.gos_ver = 1; 880 881 if (dev->dsr_version < PVRDMA_PPN64_VERSION) 882 dev->dsr->uar_pfn = dev->driver_uar.pfn; 883 else 884 dev->dsr->uar_pfn64 = dev->driver_uar.pfn; 885 886 /* Command slot. */ 887 dev->cmd_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, 888 &slot_dma, GFP_KERNEL); 889 if (!dev->cmd_slot) { 890 ret = -ENOMEM; 891 goto err_free_dsr; 892 } 893 894 dev->dsr->cmd_slot_dma = (u64)slot_dma; 895 896 /* Response slot. */ 897 dev->resp_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, 898 &slot_dma, GFP_KERNEL); 899 if (!dev->resp_slot) { 900 ret = -ENOMEM; 901 goto err_free_slots; 902 } 903 904 dev->dsr->resp_slot_dma = (u64)slot_dma; 905 906 /* Async event ring */ 907 dev->dsr->async_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES; 908 ret = pvrdma_page_dir_init(dev, &dev->async_pdir, 909 dev->dsr->async_ring_pages.num_pages, true); 910 if (ret) 911 goto err_free_slots; 912 dev->async_ring_state = dev->async_pdir.pages[0]; 913 dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma; 914 915 /* CQ notification ring */ 916 dev->dsr->cq_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES; 917 ret = pvrdma_page_dir_init(dev, &dev->cq_pdir, 918 dev->dsr->cq_ring_pages.num_pages, true); 919 if (ret) 920 goto err_free_async_ring; 921 dev->cq_ring_state = dev->cq_pdir.pages[0]; 922 dev->dsr->cq_ring_pages.pdir_dma = dev->cq_pdir.dir_dma; 923 924 /* 925 * Write the PA of the shared region to the device. The writes must be 926 * ordered such that the high bits are written last. When the writes 927 * complete, the device will have filled out the capabilities. 928 */ 929 930 pvrdma_write_reg(dev, PVRDMA_REG_DSRLOW, (u32)dev->dsrbase); 931 pvrdma_write_reg(dev, PVRDMA_REG_DSRHIGH, 932 (u32)((u64)(dev->dsrbase) >> 32)); 933 934 /* Make sure the write is complete before reading status. */ 935 mb(); 936 937 /* The driver supports RoCE V1 and V2. */ 938 if (!PVRDMA_SUPPORTED(dev)) { 939 dev_err(&pdev->dev, "driver needs RoCE v1 or v2 support\n"); 940 ret = -EFAULT; 941 goto err_free_cq_ring; 942 } 943 944 /* Paired vmxnet3 will have same bus, slot. But func will be 0 */ 945 pdev_net = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0)); 946 if (!pdev_net) { 947 dev_err(&pdev->dev, "failed to find paired net device\n"); 948 ret = -ENODEV; 949 goto err_free_cq_ring; 950 } 951 952 if (pdev_net->vendor != PCI_VENDOR_ID_VMWARE || 953 pdev_net->device != PCI_DEVICE_ID_VMWARE_VMXNET3) { 954 dev_err(&pdev->dev, "failed to find paired vmxnet3 device\n"); 955 pci_dev_put(pdev_net); 956 ret = -ENODEV; 957 goto err_free_cq_ring; 958 } 959 960 dev->netdev = pci_get_drvdata(pdev_net); 961 pci_dev_put(pdev_net); 962 if (!dev->netdev) { 963 dev_err(&pdev->dev, "failed to get vmxnet3 device\n"); 964 ret = -ENODEV; 965 goto err_free_cq_ring; 966 } 967 dev_hold(dev->netdev); 968 969 dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name); 970 971 /* Interrupt setup */ 972 ret = pvrdma_alloc_intrs(dev); 973 if (ret) { 974 dev_err(&pdev->dev, "failed to allocate interrupts\n"); 975 ret = -ENOMEM; 976 goto err_free_cq_ring; 977 } 978 979 /* Allocate UAR table. */ 980 ret = pvrdma_uar_table_init(dev); 981 if (ret) { 982 dev_err(&pdev->dev, "failed to allocate UAR table\n"); 983 ret = -ENOMEM; 984 goto err_free_intrs; 985 } 986 987 /* Allocate GID table */ 988 dev->sgid_tbl = kcalloc(dev->dsr->caps.gid_tbl_len, 989 sizeof(union ib_gid), GFP_KERNEL); 990 if (!dev->sgid_tbl) { 991 ret = -ENOMEM; 992 goto err_free_uar_table; 993 } 994 dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len); 995 996 pvrdma_enable_intrs(dev); 997 998 /* Activate pvrdma device */ 999 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE); 1000 1001 /* Make sure the write is complete before reading status. */ 1002 mb(); 1003 1004 /* Check if device was successfully activated */ 1005 ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR); 1006 if (ret != 0) { 1007 dev_err(&pdev->dev, "failed to activate device\n"); 1008 ret = -EFAULT; 1009 goto err_disable_intr; 1010 } 1011 1012 /* Register IB device */ 1013 ret = pvrdma_register_device(dev); 1014 if (ret) { 1015 dev_err(&pdev->dev, "failed to register IB device\n"); 1016 goto err_disable_intr; 1017 } 1018 1019 dev->nb_netdev.notifier_call = pvrdma_netdevice_event; 1020 ret = register_netdevice_notifier(&dev->nb_netdev); 1021 if (ret) { 1022 dev_err(&pdev->dev, "failed to register netdevice events\n"); 1023 goto err_unreg_ibdev; 1024 } 1025 1026 dev_info(&pdev->dev, "attached to device\n"); 1027 return 0; 1028 1029 err_unreg_ibdev: 1030 ib_unregister_device(&dev->ib_dev); 1031 err_disable_intr: 1032 pvrdma_disable_intrs(dev); 1033 kfree(dev->sgid_tbl); 1034 err_free_uar_table: 1035 pvrdma_uar_table_cleanup(dev); 1036 err_free_intrs: 1037 pvrdma_free_irq(dev); 1038 pci_free_irq_vectors(pdev); 1039 err_free_cq_ring: 1040 dev_put(dev->netdev); 1041 dev->netdev = NULL; 1042 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); 1043 err_free_async_ring: 1044 pvrdma_page_dir_cleanup(dev, &dev->async_pdir); 1045 err_free_slots: 1046 pvrdma_free_slots(dev); 1047 err_free_dsr: 1048 dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr, 1049 dev->dsrbase); 1050 err_uar_unmap: 1051 iounmap(dev->driver_uar.map); 1052 err_unmap_regs: 1053 iounmap(dev->regs); 1054 err_free_resource: 1055 pci_release_regions(pdev); 1056 err_disable_pdev: 1057 pci_disable_device(pdev); 1058 pci_set_drvdata(pdev, NULL); 1059 err_free_device: 1060 mutex_lock(&pvrdma_device_list_lock); 1061 list_del(&dev->device_link); 1062 mutex_unlock(&pvrdma_device_list_lock); 1063 ib_dealloc_device(&dev->ib_dev); 1064 return ret; 1065 } 1066 1067 static void pvrdma_pci_remove(struct pci_dev *pdev) 1068 { 1069 struct pvrdma_dev *dev = pci_get_drvdata(pdev); 1070 1071 if (!dev) 1072 return; 1073 1074 dev_info(&pdev->dev, "detaching from device\n"); 1075 1076 unregister_netdevice_notifier(&dev->nb_netdev); 1077 dev->nb_netdev.notifier_call = NULL; 1078 1079 flush_workqueue(event_wq); 1080 1081 dev_put(dev->netdev); 1082 dev->netdev = NULL; 1083 1084 /* Unregister ib device */ 1085 ib_unregister_device(&dev->ib_dev); 1086 1087 mutex_lock(&pvrdma_device_list_lock); 1088 list_del(&dev->device_link); 1089 mutex_unlock(&pvrdma_device_list_lock); 1090 1091 pvrdma_disable_intrs(dev); 1092 pvrdma_free_irq(dev); 1093 pci_free_irq_vectors(pdev); 1094 1095 /* Deactivate pvrdma device */ 1096 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET); 1097 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); 1098 pvrdma_page_dir_cleanup(dev, &dev->async_pdir); 1099 pvrdma_free_slots(dev); 1100 dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr, 1101 dev->dsrbase); 1102 1103 iounmap(dev->regs); 1104 kfree(dev->sgid_tbl); 1105 kfree(dev->cq_tbl); 1106 kfree(dev->srq_tbl); 1107 kfree(dev->qp_tbl); 1108 pvrdma_uar_table_cleanup(dev); 1109 iounmap(dev->driver_uar.map); 1110 1111 ib_dealloc_device(&dev->ib_dev); 1112 1113 /* Free pci resources */ 1114 pci_release_regions(pdev); 1115 pci_disable_device(pdev); 1116 pci_set_drvdata(pdev, NULL); 1117 } 1118 1119 static const struct pci_device_id pvrdma_pci_table[] = { 1120 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_PVRDMA), }, 1121 { 0 }, 1122 }; 1123 1124 MODULE_DEVICE_TABLE(pci, pvrdma_pci_table); 1125 1126 static struct pci_driver pvrdma_driver = { 1127 .name = DRV_NAME, 1128 .id_table = pvrdma_pci_table, 1129 .probe = pvrdma_pci_probe, 1130 .remove = pvrdma_pci_remove, 1131 }; 1132 1133 static int __init pvrdma_init(void) 1134 { 1135 int err; 1136 1137 event_wq = alloc_ordered_workqueue("pvrdma_event_wq", WQ_MEM_RECLAIM); 1138 if (!event_wq) 1139 return -ENOMEM; 1140 1141 err = pci_register_driver(&pvrdma_driver); 1142 if (err) 1143 destroy_workqueue(event_wq); 1144 1145 return err; 1146 } 1147 1148 static void __exit pvrdma_cleanup(void) 1149 { 1150 pci_unregister_driver(&pvrdma_driver); 1151 1152 destroy_workqueue(event_wq); 1153 } 1154 1155 module_init(pvrdma_init); 1156 module_exit(pvrdma_cleanup); 1157 1158 MODULE_AUTHOR("VMware, Inc"); 1159 MODULE_DESCRIPTION("VMware Paravirtual RDMA driver"); 1160 MODULE_LICENSE("Dual BSD/GPL"); 1161