1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ 4 /* Kai Shen <kaishen@linux.alibaba.com> */ 5 /* Copyright (c) 2020-2022, Alibaba Group. */ 6 7 #include <linux/module.h> 8 #include <net/addrconf.h> 9 #include <rdma/erdma-abi.h> 10 11 #include "erdma.h" 12 #include "erdma_cm.h" 13 #include "erdma_verbs.h" 14 15 MODULE_AUTHOR("Cheng Xu <chengyou@linux.alibaba.com>"); 16 MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver"); 17 MODULE_LICENSE("Dual BSD/GPL"); 18 19 static int erdma_netdev_event(struct notifier_block *nb, unsigned long event, 20 void *arg) 21 { 22 struct net_device *netdev = netdev_notifier_info_to_dev(arg); 23 struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb); 24 25 if (dev->netdev == NULL || dev->netdev != netdev) 26 goto done; 27 28 switch (event) { 29 case NETDEV_UP: 30 dev->state = IB_PORT_ACTIVE; 31 erdma_port_event(dev, IB_EVENT_PORT_ACTIVE); 32 break; 33 case NETDEV_DOWN: 34 dev->state = IB_PORT_DOWN; 35 erdma_port_event(dev, IB_EVENT_PORT_ERR); 36 break; 37 case NETDEV_CHANGEMTU: 38 if (dev->mtu != netdev->mtu) { 39 erdma_set_mtu(dev, netdev->mtu); 40 dev->mtu = netdev->mtu; 41 } 42 break; 43 case NETDEV_REGISTER: 44 case NETDEV_UNREGISTER: 45 case NETDEV_CHANGEADDR: 46 case NETDEV_GOING_DOWN: 47 case NETDEV_CHANGE: 48 default: 49 break; 50 } 51 52 done: 53 return NOTIFY_OK; 54 } 55 56 static int erdma_enum_and_get_netdev(struct erdma_dev *dev) 57 { 58 struct net_device *netdev; 59 int ret = -EPROBE_DEFER; 60 61 /* Already binded to a net_device, so we skip. */ 62 if (dev->netdev) 63 return 0; 64 65 rtnl_lock(); 66 for_each_netdev(&init_net, netdev) { 67 /* 68 * In erdma, the paired netdev and ibdev should have the same 69 * MAC address. erdma can get the value from its PCIe bar 70 * registers. Since erdma can not get the paired netdev 71 * reference directly, we do a traverse here to get the paired 72 * netdev. 73 */ 74 if (ether_addr_equal_unaligned(netdev->perm_addr, 75 dev->attrs.peer_addr)) { 76 ret = ib_device_set_netdev(&dev->ibdev, netdev, 1); 77 if (ret) { 78 rtnl_unlock(); 79 ibdev_warn(&dev->ibdev, 80 "failed (%d) to link netdev", ret); 81 return ret; 82 } 83 84 dev->netdev = netdev; 85 break; 86 } 87 } 88 89 rtnl_unlock(); 90 91 return ret; 92 } 93 94 static int erdma_device_register(struct erdma_dev *dev) 95 { 96 struct ib_device *ibdev = &dev->ibdev; 97 int ret; 98 99 ret = erdma_enum_and_get_netdev(dev); 100 if (ret) 101 return ret; 102 103 dev->mtu = dev->netdev->mtu; 104 addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr); 105 106 ret = ib_register_device(ibdev, "erdma_%d", &dev->pdev->dev); 107 if (ret) { 108 dev_err(&dev->pdev->dev, 109 "ib_register_device failed: ret = %d\n", ret); 110 return ret; 111 } 112 113 dev->netdev_nb.notifier_call = erdma_netdev_event; 114 ret = register_netdevice_notifier(&dev->netdev_nb); 115 if (ret) { 116 ibdev_err(&dev->ibdev, "failed to register notifier.\n"); 117 ib_unregister_device(ibdev); 118 } 119 120 return ret; 121 } 122 123 static irqreturn_t erdma_comm_irq_handler(int irq, void *data) 124 { 125 struct erdma_dev *dev = data; 126 127 erdma_cmdq_completion_handler(&dev->cmdq); 128 erdma_aeq_event_handler(dev); 129 130 return IRQ_HANDLED; 131 } 132 133 static int erdma_request_vectors(struct erdma_dev *dev) 134 { 135 int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC); 136 int ret; 137 138 ret = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, PCI_IRQ_MSIX); 139 if (ret < 0) { 140 dev_err(&dev->pdev->dev, "request irq vectors failed(%d)\n", 141 ret); 142 return ret; 143 } 144 dev->attrs.irq_num = ret; 145 146 return 0; 147 } 148 149 static int erdma_comm_irq_init(struct erdma_dev *dev) 150 { 151 snprintf(dev->comm_irq.name, ERDMA_IRQNAME_SIZE, "erdma-common@pci:%s", 152 pci_name(dev->pdev)); 153 dev->comm_irq.msix_vector = 154 pci_irq_vector(dev->pdev, ERDMA_MSIX_VECTOR_CMDQ); 155 156 cpumask_set_cpu(cpumask_first(cpumask_of_pcibus(dev->pdev->bus)), 157 &dev->comm_irq.affinity_hint_mask); 158 irq_set_affinity_hint(dev->comm_irq.msix_vector, 159 &dev->comm_irq.affinity_hint_mask); 160 161 return request_irq(dev->comm_irq.msix_vector, erdma_comm_irq_handler, 0, 162 dev->comm_irq.name, dev); 163 } 164 165 static void erdma_comm_irq_uninit(struct erdma_dev *dev) 166 { 167 irq_set_affinity_hint(dev->comm_irq.msix_vector, NULL); 168 free_irq(dev->comm_irq.msix_vector, dev); 169 } 170 171 static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev) 172 { 173 int ret; 174 175 dev->resp_pool = dma_pool_create("erdma_resp_pool", &pdev->dev, 176 ERDMA_HW_RESP_SIZE, ERDMA_HW_RESP_SIZE, 177 0); 178 if (!dev->resp_pool) 179 return -ENOMEM; 180 181 ret = dma_set_mask_and_coherent(&pdev->dev, 182 DMA_BIT_MASK(ERDMA_PCI_WIDTH)); 183 if (ret) 184 goto destroy_pool; 185 186 dma_set_max_seg_size(&pdev->dev, UINT_MAX); 187 188 return 0; 189 190 destroy_pool: 191 dma_pool_destroy(dev->resp_pool); 192 193 return ret; 194 } 195 196 static void erdma_device_uninit(struct erdma_dev *dev) 197 { 198 dma_pool_destroy(dev->resp_pool); 199 } 200 201 static void erdma_hw_reset(struct erdma_dev *dev) 202 { 203 u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1); 204 205 erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl); 206 } 207 208 static int erdma_wait_hw_init_done(struct erdma_dev *dev) 209 { 210 int i; 211 212 erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, 213 FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1)); 214 215 for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) { 216 if (erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG, 217 ERDMA_REG_DEV_ST_INIT_DONE_MASK)) 218 break; 219 220 msleep(ERDMA_REG_ACCESS_WAIT_MS); 221 } 222 223 if (i == ERDMA_WAIT_DEV_DONE_CNT) { 224 dev_err(&dev->pdev->dev, "wait init done failed.\n"); 225 return -ETIMEDOUT; 226 } 227 228 return 0; 229 } 230 231 static const struct pci_device_id erdma_pci_tbl[] = { 232 { PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) }, 233 {} 234 }; 235 236 static int erdma_probe_dev(struct pci_dev *pdev) 237 { 238 struct erdma_dev *dev; 239 int bars, err; 240 u32 version; 241 242 err = pci_enable_device(pdev); 243 if (err) { 244 dev_err(&pdev->dev, "pci_enable_device failed(%d)\n", err); 245 return err; 246 } 247 248 pci_set_master(pdev); 249 250 dev = ib_alloc_device(erdma_dev, ibdev); 251 if (!dev) { 252 dev_err(&pdev->dev, "ib_alloc_device failed\n"); 253 err = -ENOMEM; 254 goto err_disable_device; 255 } 256 257 pci_set_drvdata(pdev, dev); 258 dev->pdev = pdev; 259 dev->attrs.numa_node = dev_to_node(&pdev->dev); 260 261 bars = pci_select_bars(pdev, IORESOURCE_MEM); 262 err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); 263 if (bars != ERDMA_BAR_MASK || err) { 264 err = err ? err : -EINVAL; 265 goto err_ib_device_release; 266 } 267 268 dev->func_bar_addr = pci_resource_start(pdev, ERDMA_FUNC_BAR); 269 dev->func_bar_len = pci_resource_len(pdev, ERDMA_FUNC_BAR); 270 271 dev->func_bar = 272 devm_ioremap(&pdev->dev, dev->func_bar_addr, dev->func_bar_len); 273 if (!dev->func_bar) { 274 dev_err(&pdev->dev, "devm_ioremap failed.\n"); 275 err = -EFAULT; 276 goto err_release_bars; 277 } 278 279 version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG); 280 if (version == 0) { 281 /* we knows that it is a non-functional function. */ 282 err = -ENODEV; 283 goto err_iounmap_func_bar; 284 } 285 286 err = erdma_device_init(dev, pdev); 287 if (err) 288 goto err_iounmap_func_bar; 289 290 err = erdma_request_vectors(dev); 291 if (err) 292 goto err_uninit_device; 293 294 err = erdma_comm_irq_init(dev); 295 if (err) 296 goto err_free_vectors; 297 298 err = erdma_aeq_init(dev); 299 if (err) 300 goto err_uninit_comm_irq; 301 302 err = erdma_cmdq_init(dev); 303 if (err) 304 goto err_uninit_aeq; 305 306 err = erdma_wait_hw_init_done(dev); 307 if (err) 308 goto err_uninit_cmdq; 309 310 err = erdma_ceqs_init(dev); 311 if (err) 312 goto err_reset_hw; 313 314 erdma_finish_cmdq_init(dev); 315 316 return 0; 317 318 err_reset_hw: 319 erdma_hw_reset(dev); 320 321 err_uninit_cmdq: 322 erdma_cmdq_destroy(dev); 323 324 err_uninit_aeq: 325 erdma_aeq_destroy(dev); 326 327 err_uninit_comm_irq: 328 erdma_comm_irq_uninit(dev); 329 330 err_free_vectors: 331 pci_free_irq_vectors(dev->pdev); 332 333 err_uninit_device: 334 erdma_device_uninit(dev); 335 336 err_iounmap_func_bar: 337 devm_iounmap(&pdev->dev, dev->func_bar); 338 339 err_release_bars: 340 pci_release_selected_regions(pdev, bars); 341 342 err_ib_device_release: 343 ib_dealloc_device(&dev->ibdev); 344 345 err_disable_device: 346 pci_disable_device(pdev); 347 348 return err; 349 } 350 351 static void erdma_remove_dev(struct pci_dev *pdev) 352 { 353 struct erdma_dev *dev = pci_get_drvdata(pdev); 354 355 erdma_ceqs_uninit(dev); 356 erdma_hw_reset(dev); 357 erdma_cmdq_destroy(dev); 358 erdma_aeq_destroy(dev); 359 erdma_comm_irq_uninit(dev); 360 pci_free_irq_vectors(dev->pdev); 361 erdma_device_uninit(dev); 362 363 devm_iounmap(&pdev->dev, dev->func_bar); 364 pci_release_selected_regions(pdev, ERDMA_BAR_MASK); 365 366 ib_dealloc_device(&dev->ibdev); 367 368 pci_disable_device(pdev); 369 } 370 371 #define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap) 372 373 static int erdma_dev_attrs_init(struct erdma_dev *dev) 374 { 375 int err; 376 u64 req_hdr, cap0, cap1; 377 378 erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_RDMA, 379 CMDQ_OPCODE_QUERY_DEVICE); 380 381 err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0, 382 &cap1); 383 if (err) 384 return err; 385 386 dev->attrs.max_cqe = 1 << ERDMA_GET_CAP(MAX_CQE, cap0); 387 dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0); 388 dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1); 389 dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0); 390 dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1); 391 dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1); 392 dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1); 393 dev->attrs.max_mr = dev->attrs.max_qp << 1; 394 dev->attrs.max_cq = dev->attrs.max_qp << 1; 395 dev->attrs.cap_flags = ERDMA_GET_CAP(FLAGS, cap0); 396 397 dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR; 398 dev->attrs.max_ord = ERDMA_MAX_ORD; 399 dev->attrs.max_ird = ERDMA_MAX_IRD; 400 dev->attrs.max_send_sge = ERDMA_MAX_SEND_SGE; 401 dev->attrs.max_recv_sge = ERDMA_MAX_RECV_SGE; 402 dev->attrs.max_sge_rd = ERDMA_MAX_SGE_RD; 403 dev->attrs.max_pd = ERDMA_MAX_PD; 404 405 dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD; 406 dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr; 407 408 erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON, 409 CMDQ_OPCODE_QUERY_FW_INFO); 410 411 err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0, 412 &cap1); 413 if (!err) 414 dev->attrs.fw_version = 415 FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0); 416 417 return err; 418 } 419 420 static int erdma_device_config(struct erdma_dev *dev) 421 { 422 struct erdma_cmdq_config_device_req req = {}; 423 424 if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_EXTEND_DB)) 425 return 0; 426 427 erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, 428 CMDQ_OPCODE_CONF_DEVICE); 429 430 req.cfg = FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK, PAGE_SHIFT) | 431 FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK, 1); 432 433 return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); 434 } 435 436 static int erdma_res_cb_init(struct erdma_dev *dev) 437 { 438 int i, j; 439 440 for (i = 0; i < ERDMA_RES_CNT; i++) { 441 dev->res_cb[i].next_alloc_idx = 1; 442 spin_lock_init(&dev->res_cb[i].lock); 443 dev->res_cb[i].bitmap = 444 bitmap_zalloc(dev->res_cb[i].max_cap, GFP_KERNEL); 445 if (!dev->res_cb[i].bitmap) 446 goto err; 447 } 448 449 return 0; 450 451 err: 452 for (j = 0; j < i; j++) 453 bitmap_free(dev->res_cb[j].bitmap); 454 455 return -ENOMEM; 456 } 457 458 static void erdma_res_cb_free(struct erdma_dev *dev) 459 { 460 int i; 461 462 for (i = 0; i < ERDMA_RES_CNT; i++) 463 bitmap_free(dev->res_cb[i].bitmap); 464 } 465 466 static const struct ib_device_ops erdma_device_ops = { 467 .owner = THIS_MODULE, 468 .driver_id = RDMA_DRIVER_ERDMA, 469 .uverbs_abi_ver = ERDMA_ABI_VERSION, 470 471 .alloc_hw_port_stats = erdma_alloc_hw_port_stats, 472 .alloc_mr = erdma_ib_alloc_mr, 473 .alloc_pd = erdma_alloc_pd, 474 .alloc_ucontext = erdma_alloc_ucontext, 475 .create_cq = erdma_create_cq, 476 .create_qp = erdma_create_qp, 477 .dealloc_pd = erdma_dealloc_pd, 478 .dealloc_ucontext = erdma_dealloc_ucontext, 479 .dereg_mr = erdma_dereg_mr, 480 .destroy_cq = erdma_destroy_cq, 481 .destroy_qp = erdma_destroy_qp, 482 .get_dma_mr = erdma_get_dma_mr, 483 .get_hw_stats = erdma_get_hw_stats, 484 .get_port_immutable = erdma_get_port_immutable, 485 .iw_accept = erdma_accept, 486 .iw_add_ref = erdma_qp_get_ref, 487 .iw_connect = erdma_connect, 488 .iw_create_listen = erdma_create_listen, 489 .iw_destroy_listen = erdma_destroy_listen, 490 .iw_get_qp = erdma_get_ibqp, 491 .iw_reject = erdma_reject, 492 .iw_rem_ref = erdma_qp_put_ref, 493 .map_mr_sg = erdma_map_mr_sg, 494 .mmap = erdma_mmap, 495 .mmap_free = erdma_mmap_free, 496 .modify_qp = erdma_modify_qp, 497 .post_recv = erdma_post_recv, 498 .post_send = erdma_post_send, 499 .poll_cq = erdma_poll_cq, 500 .query_device = erdma_query_device, 501 .query_gid = erdma_query_gid, 502 .query_port = erdma_query_port, 503 .query_qp = erdma_query_qp, 504 .req_notify_cq = erdma_req_notify_cq, 505 .reg_user_mr = erdma_reg_user_mr, 506 507 INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq), 508 INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd), 509 INIT_RDMA_OBJ_SIZE(ib_ucontext, erdma_ucontext, ibucontext), 510 INIT_RDMA_OBJ_SIZE(ib_qp, erdma_qp, ibqp), 511 }; 512 513 static int erdma_ib_device_add(struct pci_dev *pdev) 514 { 515 struct erdma_dev *dev = pci_get_drvdata(pdev); 516 struct ib_device *ibdev = &dev->ibdev; 517 u64 mac; 518 int ret; 519 520 ret = erdma_dev_attrs_init(dev); 521 if (ret) 522 return ret; 523 524 ret = erdma_device_config(dev); 525 if (ret) 526 return ret; 527 528 ibdev->node_type = RDMA_NODE_RNIC; 529 memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC)); 530 531 /* 532 * Current model (one-to-one device association): 533 * One ERDMA device per net_device or, equivalently, 534 * per physical port. 535 */ 536 ibdev->phys_port_cnt = 1; 537 ibdev->num_comp_vectors = dev->attrs.irq_num - 1; 538 539 ib_set_device_ops(ibdev, &erdma_device_ops); 540 541 INIT_LIST_HEAD(&dev->cep_list); 542 543 spin_lock_init(&dev->lock); 544 xa_init_flags(&dev->qp_xa, XA_FLAGS_ALLOC1); 545 xa_init_flags(&dev->cq_xa, XA_FLAGS_ALLOC1); 546 dev->next_alloc_cqn = 1; 547 dev->next_alloc_qpn = 1; 548 549 ret = erdma_res_cb_init(dev); 550 if (ret) 551 return ret; 552 553 atomic_set(&dev->num_ctx, 0); 554 555 mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG); 556 mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32; 557 558 u64_to_ether_addr(mac, dev->attrs.peer_addr); 559 560 dev->reflush_wq = alloc_workqueue("erdma-reflush-wq", WQ_UNBOUND, 561 WQ_UNBOUND_MAX_ACTIVE); 562 if (!dev->reflush_wq) { 563 ret = -ENOMEM; 564 goto err_alloc_workqueue; 565 } 566 567 ret = erdma_device_register(dev); 568 if (ret) 569 goto err_register; 570 571 return 0; 572 573 err_register: 574 destroy_workqueue(dev->reflush_wq); 575 err_alloc_workqueue: 576 xa_destroy(&dev->qp_xa); 577 xa_destroy(&dev->cq_xa); 578 579 erdma_res_cb_free(dev); 580 581 return ret; 582 } 583 584 static void erdma_ib_device_remove(struct pci_dev *pdev) 585 { 586 struct erdma_dev *dev = pci_get_drvdata(pdev); 587 588 unregister_netdevice_notifier(&dev->netdev_nb); 589 ib_unregister_device(&dev->ibdev); 590 591 destroy_workqueue(dev->reflush_wq); 592 erdma_res_cb_free(dev); 593 xa_destroy(&dev->qp_xa); 594 xa_destroy(&dev->cq_xa); 595 } 596 597 static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 598 { 599 int ret; 600 601 ret = erdma_probe_dev(pdev); 602 if (ret) 603 return ret; 604 605 ret = erdma_ib_device_add(pdev); 606 if (ret) { 607 erdma_remove_dev(pdev); 608 return ret; 609 } 610 611 return 0; 612 } 613 614 static void erdma_remove(struct pci_dev *pdev) 615 { 616 erdma_ib_device_remove(pdev); 617 erdma_remove_dev(pdev); 618 } 619 620 static struct pci_driver erdma_pci_driver = { 621 .name = DRV_MODULE_NAME, 622 .id_table = erdma_pci_tbl, 623 .probe = erdma_probe, 624 .remove = erdma_remove 625 }; 626 627 MODULE_DEVICE_TABLE(pci, erdma_pci_tbl); 628 629 static __init int erdma_init_module(void) 630 { 631 int ret; 632 633 ret = erdma_cm_init(); 634 if (ret) 635 return ret; 636 637 ret = pci_register_driver(&erdma_pci_driver); 638 if (ret) 639 erdma_cm_exit(); 640 641 return ret; 642 } 643 644 static void __exit erdma_exit_module(void) 645 { 646 pci_unregister_driver(&erdma_pci_driver); 647 648 erdma_cm_exit(); 649 } 650 651 module_init(erdma_init_module); 652 module_exit(erdma_exit_module); 653