xref: /linux/drivers/infiniband/hw/erdma/erdma_main.c (revision 061834624c87282c6d9d8c5395aaff4380e5e1fc)
1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /*          Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
6 
7 #include <linux/errno.h>
8 #include <linux/init.h>
9 #include <linux/kernel.h>
10 #include <linux/list.h>
11 #include <linux/module.h>
12 #include <linux/netdevice.h>
13 #include <linux/pci.h>
14 #include <net/addrconf.h>
15 #include <rdma/erdma-abi.h>
16 #include <rdma/ib_verbs.h>
17 #include <rdma/ib_user_verbs.h>
18 
19 #include "erdma.h"
20 #include "erdma_cm.h"
21 #include "erdma_hw.h"
22 #include "erdma_verbs.h"
23 
24 MODULE_AUTHOR("Cheng Xu <chengyou@linux.alibaba.com>");
25 MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver");
26 MODULE_LICENSE("Dual BSD/GPL");
27 
28 static int erdma_netdev_event(struct notifier_block *nb, unsigned long event,
29 			      void *arg)
30 {
31 	struct net_device *netdev = netdev_notifier_info_to_dev(arg);
32 	struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb);
33 
34 	if (dev->netdev == NULL || dev->netdev != netdev)
35 		goto done;
36 
37 	switch (event) {
38 	case NETDEV_UP:
39 		dev->state = IB_PORT_ACTIVE;
40 		erdma_port_event(dev, IB_EVENT_PORT_ACTIVE);
41 		break;
42 	case NETDEV_DOWN:
43 		dev->state = IB_PORT_DOWN;
44 		erdma_port_event(dev, IB_EVENT_PORT_ERR);
45 		break;
46 	case NETDEV_REGISTER:
47 	case NETDEV_UNREGISTER:
48 	case NETDEV_CHANGEADDR:
49 	case NETDEV_CHANGEMTU:
50 	case NETDEV_GOING_DOWN:
51 	case NETDEV_CHANGE:
52 	default:
53 		break;
54 	}
55 
56 done:
57 	return NOTIFY_OK;
58 }
59 
60 static int erdma_enum_and_get_netdev(struct erdma_dev *dev)
61 {
62 	struct net_device *netdev;
63 	int ret = -ENODEV;
64 
65 	/* Already binded to a net_device, so we skip. */
66 	if (dev->netdev)
67 		return 0;
68 
69 	rtnl_lock();
70 	for_each_netdev(&init_net, netdev) {
71 		/*
72 		 * In erdma, the paired netdev and ibdev should have the same
73 		 * MAC address. erdma can get the value from its PCIe bar
74 		 * registers. Since erdma can not get the paired netdev
75 		 * reference directly, we do a traverse here to get the paired
76 		 * netdev.
77 		 */
78 		if (ether_addr_equal_unaligned(netdev->perm_addr,
79 					       dev->attrs.peer_addr)) {
80 			ret = ib_device_set_netdev(&dev->ibdev, netdev, 1);
81 			if (ret) {
82 				rtnl_unlock();
83 				ibdev_warn(&dev->ibdev,
84 					   "failed (%d) to link netdev", ret);
85 				return ret;
86 			}
87 
88 			dev->netdev = netdev;
89 			break;
90 		}
91 	}
92 
93 	rtnl_unlock();
94 
95 	return ret;
96 }
97 
98 static int erdma_device_register(struct erdma_dev *dev)
99 {
100 	struct ib_device *ibdev = &dev->ibdev;
101 	int ret;
102 
103 	ret = erdma_enum_and_get_netdev(dev);
104 	if (ret)
105 		return ret;
106 
107 	addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr);
108 
109 	ret = ib_register_device(ibdev, "erdma_%d", &dev->pdev->dev);
110 	if (ret) {
111 		dev_err(&dev->pdev->dev,
112 			"ib_register_device failed: ret = %d\n", ret);
113 		return ret;
114 	}
115 
116 	dev->netdev_nb.notifier_call = erdma_netdev_event;
117 	ret = register_netdevice_notifier(&dev->netdev_nb);
118 	if (ret) {
119 		ibdev_err(&dev->ibdev, "failed to register notifier.\n");
120 		ib_unregister_device(ibdev);
121 	}
122 
123 	return ret;
124 }
125 
126 static irqreturn_t erdma_comm_irq_handler(int irq, void *data)
127 {
128 	struct erdma_dev *dev = data;
129 
130 	erdma_cmdq_completion_handler(&dev->cmdq);
131 	erdma_aeq_event_handler(dev);
132 
133 	return IRQ_HANDLED;
134 }
135 
136 static void erdma_dwqe_resource_init(struct erdma_dev *dev)
137 {
138 	int total_pages, type0, type1;
139 
140 	dev->attrs.grp_num = erdma_reg_read32(dev, ERDMA_REGS_GRP_NUM_REG);
141 
142 	if (dev->attrs.grp_num < 4)
143 		dev->attrs.disable_dwqe = true;
144 	else
145 		dev->attrs.disable_dwqe = false;
146 
147 	/* One page contains 4 goups. */
148 	total_pages = dev->attrs.grp_num * 4;
149 
150 	if (dev->attrs.grp_num >= ERDMA_DWQE_MAX_GRP_CNT) {
151 		dev->attrs.grp_num = ERDMA_DWQE_MAX_GRP_CNT;
152 		type0 = ERDMA_DWQE_TYPE0_CNT;
153 		type1 = ERDMA_DWQE_TYPE1_CNT / ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
154 	} else {
155 		type1 = total_pages / 3;
156 		type0 = total_pages - type1 - 1;
157 	}
158 
159 	dev->attrs.dwqe_pages = type0;
160 	dev->attrs.dwqe_entries = type1 * ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
161 }
162 
163 static int erdma_request_vectors(struct erdma_dev *dev)
164 {
165 	int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC);
166 	int ret;
167 
168 	ret = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, PCI_IRQ_MSIX);
169 	if (ret < 0) {
170 		dev_err(&dev->pdev->dev, "request irq vectors failed(%d)\n",
171 			ret);
172 		return ret;
173 	}
174 	dev->attrs.irq_num = ret;
175 
176 	return 0;
177 }
178 
179 static int erdma_comm_irq_init(struct erdma_dev *dev)
180 {
181 	snprintf(dev->comm_irq.name, ERDMA_IRQNAME_SIZE, "erdma-common@pci:%s",
182 		 pci_name(dev->pdev));
183 	dev->comm_irq.msix_vector =
184 		pci_irq_vector(dev->pdev, ERDMA_MSIX_VECTOR_CMDQ);
185 
186 	cpumask_set_cpu(cpumask_first(cpumask_of_pcibus(dev->pdev->bus)),
187 			&dev->comm_irq.affinity_hint_mask);
188 	irq_set_affinity_hint(dev->comm_irq.msix_vector,
189 			      &dev->comm_irq.affinity_hint_mask);
190 
191 	return request_irq(dev->comm_irq.msix_vector, erdma_comm_irq_handler, 0,
192 			   dev->comm_irq.name, dev);
193 }
194 
195 static void erdma_comm_irq_uninit(struct erdma_dev *dev)
196 {
197 	irq_set_affinity_hint(dev->comm_irq.msix_vector, NULL);
198 	free_irq(dev->comm_irq.msix_vector, dev);
199 }
200 
201 static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev)
202 {
203 	int ret;
204 
205 	erdma_dwqe_resource_init(dev);
206 
207 	ret = dma_set_mask_and_coherent(&pdev->dev,
208 					DMA_BIT_MASK(ERDMA_PCI_WIDTH));
209 	if (ret)
210 		return ret;
211 
212 	dma_set_max_seg_size(&pdev->dev, UINT_MAX);
213 
214 	return 0;
215 }
216 
217 static void erdma_device_uninit(struct erdma_dev *dev)
218 {
219 	u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1);
220 
221 	erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
222 }
223 
224 static const struct pci_device_id erdma_pci_tbl[] = {
225 	{ PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) },
226 	{}
227 };
228 
229 static int erdma_probe_dev(struct pci_dev *pdev)
230 {
231 	struct erdma_dev *dev;
232 	int bars, err;
233 	u32 version;
234 
235 	err = pci_enable_device(pdev);
236 	if (err) {
237 		dev_err(&pdev->dev, "pci_enable_device failed(%d)\n", err);
238 		return err;
239 	}
240 
241 	pci_set_master(pdev);
242 
243 	dev = ib_alloc_device(erdma_dev, ibdev);
244 	if (!dev) {
245 		dev_err(&pdev->dev, "ib_alloc_device failed\n");
246 		err = -ENOMEM;
247 		goto err_disable_device;
248 	}
249 
250 	pci_set_drvdata(pdev, dev);
251 	dev->pdev = pdev;
252 	dev->attrs.numa_node = dev_to_node(&pdev->dev);
253 
254 	bars = pci_select_bars(pdev, IORESOURCE_MEM);
255 	err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
256 	if (bars != ERDMA_BAR_MASK || err) {
257 		err = err ? err : -EINVAL;
258 		goto err_ib_device_release;
259 	}
260 
261 	dev->func_bar_addr = pci_resource_start(pdev, ERDMA_FUNC_BAR);
262 	dev->func_bar_len = pci_resource_len(pdev, ERDMA_FUNC_BAR);
263 
264 	dev->func_bar =
265 		devm_ioremap(&pdev->dev, dev->func_bar_addr, dev->func_bar_len);
266 	if (!dev->func_bar) {
267 		dev_err(&pdev->dev, "devm_ioremap failed.\n");
268 		err = -EFAULT;
269 		goto err_release_bars;
270 	}
271 
272 	version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
273 	if (version == 0) {
274 		/* we knows that it is a non-functional function. */
275 		err = -ENODEV;
276 		goto err_iounmap_func_bar;
277 	}
278 
279 	err = erdma_device_init(dev, pdev);
280 	if (err)
281 		goto err_iounmap_func_bar;
282 
283 	err = erdma_request_vectors(dev);
284 	if (err)
285 		goto err_iounmap_func_bar;
286 
287 	err = erdma_comm_irq_init(dev);
288 	if (err)
289 		goto err_free_vectors;
290 
291 	err = erdma_aeq_init(dev);
292 	if (err)
293 		goto err_uninit_comm_irq;
294 
295 	err = erdma_cmdq_init(dev);
296 	if (err)
297 		goto err_uninit_aeq;
298 
299 	err = erdma_ceqs_init(dev);
300 	if (err)
301 		goto err_uninit_cmdq;
302 
303 	erdma_finish_cmdq_init(dev);
304 
305 	return 0;
306 
307 err_uninit_cmdq:
308 	erdma_device_uninit(dev);
309 	erdma_cmdq_destroy(dev);
310 
311 err_uninit_aeq:
312 	erdma_aeq_destroy(dev);
313 
314 err_uninit_comm_irq:
315 	erdma_comm_irq_uninit(dev);
316 
317 err_free_vectors:
318 	pci_free_irq_vectors(dev->pdev);
319 
320 err_iounmap_func_bar:
321 	devm_iounmap(&pdev->dev, dev->func_bar);
322 
323 err_release_bars:
324 	pci_release_selected_regions(pdev, bars);
325 
326 err_ib_device_release:
327 	ib_dealloc_device(&dev->ibdev);
328 
329 err_disable_device:
330 	pci_disable_device(pdev);
331 
332 	return err;
333 }
334 
335 static void erdma_remove_dev(struct pci_dev *pdev)
336 {
337 	struct erdma_dev *dev = pci_get_drvdata(pdev);
338 
339 	erdma_ceqs_uninit(dev);
340 
341 	erdma_device_uninit(dev);
342 
343 	erdma_cmdq_destroy(dev);
344 	erdma_aeq_destroy(dev);
345 	erdma_comm_irq_uninit(dev);
346 	pci_free_irq_vectors(dev->pdev);
347 
348 	devm_iounmap(&pdev->dev, dev->func_bar);
349 	pci_release_selected_regions(pdev, ERDMA_BAR_MASK);
350 
351 	ib_dealloc_device(&dev->ibdev);
352 
353 	pci_disable_device(pdev);
354 }
355 
356 #define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap)
357 
358 static int erdma_dev_attrs_init(struct erdma_dev *dev)
359 {
360 	int err;
361 	u64 req_hdr, cap0, cap1;
362 
363 	erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_RDMA,
364 				CMDQ_OPCODE_QUERY_DEVICE);
365 
366 	err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
367 				  &cap1);
368 	if (err)
369 		return err;
370 
371 	dev->attrs.max_cqe = 1 << ERDMA_GET_CAP(MAX_CQE, cap0);
372 	dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0);
373 	dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1);
374 	dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0);
375 	dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1);
376 	dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1);
377 	dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
378 	dev->attrs.max_mr = dev->attrs.max_qp << 1;
379 	dev->attrs.max_cq = dev->attrs.max_qp << 1;
380 
381 	dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR;
382 	dev->attrs.max_ord = ERDMA_MAX_ORD;
383 	dev->attrs.max_ird = ERDMA_MAX_IRD;
384 	dev->attrs.max_send_sge = ERDMA_MAX_SEND_SGE;
385 	dev->attrs.max_recv_sge = ERDMA_MAX_RECV_SGE;
386 	dev->attrs.max_sge_rd = ERDMA_MAX_SGE_RD;
387 	dev->attrs.max_pd = ERDMA_MAX_PD;
388 
389 	dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD;
390 	dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr;
391 
392 	erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON,
393 				CMDQ_OPCODE_QUERY_FW_INFO);
394 
395 	err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
396 				  &cap1);
397 	if (!err)
398 		dev->attrs.fw_version =
399 			FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0);
400 
401 	return err;
402 }
403 
404 static int erdma_res_cb_init(struct erdma_dev *dev)
405 {
406 	int i, j;
407 
408 	for (i = 0; i < ERDMA_RES_CNT; i++) {
409 		dev->res_cb[i].next_alloc_idx = 1;
410 		spin_lock_init(&dev->res_cb[i].lock);
411 		dev->res_cb[i].bitmap =
412 			bitmap_zalloc(dev->res_cb[i].max_cap, GFP_KERNEL);
413 		if (!dev->res_cb[i].bitmap)
414 			goto err;
415 	}
416 
417 	return 0;
418 
419 err:
420 	for (j = 0; j < i; j++)
421 		bitmap_free(dev->res_cb[j].bitmap);
422 
423 	return -ENOMEM;
424 }
425 
426 static void erdma_res_cb_free(struct erdma_dev *dev)
427 {
428 	int i;
429 
430 	for (i = 0; i < ERDMA_RES_CNT; i++)
431 		bitmap_free(dev->res_cb[i].bitmap);
432 }
433 
434 static const struct ib_device_ops erdma_device_ops = {
435 	.owner = THIS_MODULE,
436 	.driver_id = RDMA_DRIVER_ERDMA,
437 	.uverbs_abi_ver = ERDMA_ABI_VERSION,
438 
439 	.alloc_mr = erdma_ib_alloc_mr,
440 	.alloc_pd = erdma_alloc_pd,
441 	.alloc_ucontext = erdma_alloc_ucontext,
442 	.create_cq = erdma_create_cq,
443 	.create_qp = erdma_create_qp,
444 	.dealloc_pd = erdma_dealloc_pd,
445 	.dealloc_ucontext = erdma_dealloc_ucontext,
446 	.dereg_mr = erdma_dereg_mr,
447 	.destroy_cq = erdma_destroy_cq,
448 	.destroy_qp = erdma_destroy_qp,
449 	.get_dma_mr = erdma_get_dma_mr,
450 	.get_port_immutable = erdma_get_port_immutable,
451 	.iw_accept = erdma_accept,
452 	.iw_add_ref = erdma_qp_get_ref,
453 	.iw_connect = erdma_connect,
454 	.iw_create_listen = erdma_create_listen,
455 	.iw_destroy_listen = erdma_destroy_listen,
456 	.iw_get_qp = erdma_get_ibqp,
457 	.iw_reject = erdma_reject,
458 	.iw_rem_ref = erdma_qp_put_ref,
459 	.map_mr_sg = erdma_map_mr_sg,
460 	.mmap = erdma_mmap,
461 	.mmap_free = erdma_mmap_free,
462 	.modify_qp = erdma_modify_qp,
463 	.post_recv = erdma_post_recv,
464 	.post_send = erdma_post_send,
465 	.poll_cq = erdma_poll_cq,
466 	.query_device = erdma_query_device,
467 	.query_gid = erdma_query_gid,
468 	.query_port = erdma_query_port,
469 	.query_qp = erdma_query_qp,
470 	.req_notify_cq = erdma_req_notify_cq,
471 	.reg_user_mr = erdma_reg_user_mr,
472 
473 	INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq),
474 	INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd),
475 	INIT_RDMA_OBJ_SIZE(ib_ucontext, erdma_ucontext, ibucontext),
476 	INIT_RDMA_OBJ_SIZE(ib_qp, erdma_qp, ibqp),
477 };
478 
479 static int erdma_ib_device_add(struct pci_dev *pdev)
480 {
481 	struct erdma_dev *dev = pci_get_drvdata(pdev);
482 	struct ib_device *ibdev = &dev->ibdev;
483 	u64 mac;
484 	int ret;
485 
486 	ret = erdma_dev_attrs_init(dev);
487 	if (ret)
488 		return ret;
489 
490 	ibdev->node_type = RDMA_NODE_RNIC;
491 	memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC));
492 
493 	/*
494 	 * Current model (one-to-one device association):
495 	 * One ERDMA device per net_device or, equivalently,
496 	 * per physical port.
497 	 */
498 	ibdev->phys_port_cnt = 1;
499 	ibdev->num_comp_vectors = dev->attrs.irq_num - 1;
500 
501 	ib_set_device_ops(ibdev, &erdma_device_ops);
502 
503 	INIT_LIST_HEAD(&dev->cep_list);
504 
505 	spin_lock_init(&dev->lock);
506 	xa_init_flags(&dev->qp_xa, XA_FLAGS_ALLOC1);
507 	xa_init_flags(&dev->cq_xa, XA_FLAGS_ALLOC1);
508 	dev->next_alloc_cqn = 1;
509 	dev->next_alloc_qpn = 1;
510 
511 	ret = erdma_res_cb_init(dev);
512 	if (ret)
513 		return ret;
514 
515 	spin_lock_init(&dev->db_bitmap_lock);
516 	bitmap_zero(dev->sdb_page, ERDMA_DWQE_TYPE0_CNT);
517 	bitmap_zero(dev->sdb_entry, ERDMA_DWQE_TYPE1_CNT);
518 
519 	atomic_set(&dev->num_ctx, 0);
520 
521 	mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG);
522 	mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32;
523 
524 	u64_to_ether_addr(mac, dev->attrs.peer_addr);
525 
526 	ret = erdma_device_register(dev);
527 	if (ret)
528 		goto err_out;
529 
530 	return 0;
531 
532 err_out:
533 	xa_destroy(&dev->qp_xa);
534 	xa_destroy(&dev->cq_xa);
535 
536 	erdma_res_cb_free(dev);
537 
538 	return ret;
539 }
540 
541 static void erdma_ib_device_remove(struct pci_dev *pdev)
542 {
543 	struct erdma_dev *dev = pci_get_drvdata(pdev);
544 
545 	unregister_netdevice_notifier(&dev->netdev_nb);
546 	ib_unregister_device(&dev->ibdev);
547 
548 	erdma_res_cb_free(dev);
549 	xa_destroy(&dev->qp_xa);
550 	xa_destroy(&dev->cq_xa);
551 }
552 
553 static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
554 {
555 	int ret;
556 
557 	ret = erdma_probe_dev(pdev);
558 	if (ret)
559 		return ret;
560 
561 	ret = erdma_ib_device_add(pdev);
562 	if (ret) {
563 		erdma_remove_dev(pdev);
564 		return ret;
565 	}
566 
567 	return 0;
568 }
569 
570 static void erdma_remove(struct pci_dev *pdev)
571 {
572 	erdma_ib_device_remove(pdev);
573 	erdma_remove_dev(pdev);
574 }
575 
576 static struct pci_driver erdma_pci_driver = {
577 	.name = DRV_MODULE_NAME,
578 	.id_table = erdma_pci_tbl,
579 	.probe = erdma_probe,
580 	.remove = erdma_remove
581 };
582 
583 MODULE_DEVICE_TABLE(pci, erdma_pci_tbl);
584 
585 static __init int erdma_init_module(void)
586 {
587 	int ret;
588 
589 	ret = erdma_cm_init();
590 	if (ret)
591 		return ret;
592 
593 	ret = pci_register_driver(&erdma_pci_driver);
594 	if (ret)
595 		erdma_cm_exit();
596 
597 	return ret;
598 }
599 
600 static void __exit erdma_exit_module(void)
601 {
602 	pci_unregister_driver(&erdma_pci_driver);
603 
604 	erdma_cm_exit();
605 }
606 
607 module_init(erdma_init_module);
608 module_exit(erdma_exit_module);
609