xref: /freebsd/sys/dev/irdma/icrdma.c (revision 3a3af6b2a160bea72509a9d5ef84e25906b0478a)
1 /*-
2  * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
3  *
4  * Copyright (c) 2021 - 2022 Intel Corporation
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenFabrics.org BSD license below:
11  *
12  *   Redistribution and use in source and binary forms, with or
13  *   without modification, are permitted provided that the following
14  *   conditions are met:
15  *
16  *    - Redistributions of source code must retain the above
17  *	copyright notice, this list of conditions and the following
18  *	disclaimer.
19  *
20  *    - Redistributions in binary form must reproduce the above
21  *	copyright notice, this list of conditions and the following
22  *	disclaimer in the documentation and/or other materials
23  *	provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 /*$FreeBSD$*/
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/bus.h>
39 #include <sys/kernel.h>
40 #include <sys/module.h>
41 #include <sys/sysctl.h>
42 #include <machine/bus.h>
43 #include <linux/device.h>
44 #include <sys/rman.h>
45 
46 #include "ice_rdma.h"
47 #include "irdma_main.h"
48 #include "icrdma_hw.h"
49 
50 #include "irdma_if.h"
51 #include "irdma_di_if.h"
52 
53 /**
54  *  Driver version
55  */
56 char irdma_driver_version[] = "0.0.51-k";
57 
58 #define pf_if_d(peer) peer->ifp->if_dunit
59 
60 /**
61  * irdma_init_tunable - prepare tunables
62  * @rf: RDMA PCI function
63  * @pf_id: id of the pf
64  */
65 static void
66 irdma_init_tunable(struct irdma_pci_f *rf, uint8_t pf_id)
67 {
68 	struct sysctl_oid_list *irdma_sysctl_oid_list;
69 	char pf_name[16];
70 
71 	snprintf(pf_name, 15, "irdma%d", pf_id);
72 	sysctl_ctx_init(&rf->tun_info.irdma_sysctl_ctx);
73 
74 	rf->tun_info.irdma_sysctl_tree = SYSCTL_ADD_NODE(&rf->tun_info.irdma_sysctl_ctx,
75 							 SYSCTL_STATIC_CHILDREN(_dev),
76 							 OID_AUTO, pf_name, CTLFLAG_RD,
77 							 NULL, "");
78 
79 	irdma_sysctl_oid_list = SYSCTL_CHILDREN(rf->tun_info.irdma_sysctl_tree);
80 
81 	/*
82 	 * debug mask setting
83 	 */
84 	SYSCTL_ADD_S32(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list,
85 		       OID_AUTO, "debug", CTLFLAG_RWTUN, &rf->sc_dev.debug_mask,
86 		       0, "irdma debug");
87 
88 	/*
89 	 * RoCEv2/iWARP setting RoCEv2 the default mode
90 	 */
91 	rf->tun_info.roce_ena = 1;
92 	SYSCTL_ADD_U8(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO,
93 		      "roce_enable", CTLFLAG_RDTUN, &rf->tun_info.roce_ena, 0,
94 		      "RoCEv2 mode enable");
95 
96 	rf->protocol_used = IRDMA_IWARP_PROTOCOL_ONLY;
97 	if (rf->tun_info.roce_ena == 1)
98 		rf->protocol_used = IRDMA_ROCE_PROTOCOL_ONLY;
99 	else if (rf->tun_info.roce_ena != 0)
100 		printf("%s:%d wrong roce_enable value (%d), using iWARP\n",
101 		       __func__, __LINE__, rf->tun_info.roce_ena);
102 	printf("%s:%d protocol: %s, roce_enable value: %d\n", __func__, __LINE__,
103 	       (rf->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) ? "iWARP" : "RoCEv2",
104 	       rf->tun_info.roce_ena);
105 
106 	irdma_dcqcn_tunables_init(rf);
107 }
108 
109 /**
110  * irdma_find_handler - obtain hdl object to identify pf
111  * @p_dev: the peer interface structure
112  */
113 static struct irdma_handler *
114 irdma_find_handler(struct ice_rdma_peer *p_dev)
115 {
116 	struct irdma_handler *hdl;
117 	unsigned long flags;
118 
119 	spin_lock_irqsave(&irdma_handler_lock, flags);
120 	list_for_each_entry(hdl, &irdma_handlers, list) {
121 		if (!hdl)
122 			continue;
123 		if (!hdl->iwdev->rf->peer_info)
124 			continue;
125 		if (hdl->iwdev->rf->peer_info->dev == p_dev->dev) {
126 			spin_unlock_irqrestore(&irdma_handler_lock, flags);
127 			return hdl;
128 		}
129 	}
130 	spin_unlock_irqrestore(&irdma_handler_lock, flags);
131 
132 	return NULL;
133 }
134 
135 /**
136  * peer_to_iwdev - return iwdev based on peer
137  * @peer: the peer interface structure
138  */
139 static struct irdma_device *
140 peer_to_iwdev(struct ice_rdma_peer *peer)
141 {
142 	struct irdma_handler *hdl;
143 
144 	hdl = irdma_find_handler(peer);
145 	if (!hdl) {
146 		printf("%s:%d rdma handler not found\n", __func__, __LINE__);
147 		return NULL;
148 	}
149 
150 	return hdl->iwdev;
151 }
152 
153 /**
154  * irdma_get_qos_info - save qos info from parameters to internal struct
155  * @l2params: destination, qos, tc, mtu info structure
156  * @qos_info: source, DCB settings structure
157  */
158 static void
159 irdma_get_qos_info(struct irdma_l2params *l2params, struct ice_qos_params *qos_info)
160 {
161 	int i;
162 
163 	l2params->num_tc = qos_info->num_tc;
164 	l2params->num_apps = qos_info->num_apps;
165 	l2params->vsi_prio_type = qos_info->vsi_priority_type;
166 	l2params->vsi_rel_bw = qos_info->vsi_relative_bw;
167 	for (i = 0; i < l2params->num_tc; i++) {
168 		l2params->tc_info[i].egress_virt_up =
169 		    qos_info->tc_info[i].egress_virt_up;
170 		l2params->tc_info[i].ingress_virt_up =
171 		    qos_info->tc_info[i].ingress_virt_up;
172 		l2params->tc_info[i].prio_type = qos_info->tc_info[i].prio_type;
173 		l2params->tc_info[i].rel_bw = qos_info->tc_info[i].rel_bw;
174 		l2params->tc_info[i].tc_ctx = qos_info->tc_info[i].tc_ctx;
175 	}
176 	for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++)
177 		l2params->up2tc[i] = qos_info->up2tc[i];
178 
179 	if (qos_info->pfc_mode == IRDMA_QOS_MODE_DSCP) {
180 		l2params->dscp_mode = true;
181 		memcpy(l2params->dscp_map, qos_info->dscp_map, sizeof(l2params->dscp_map));
182 	}
183 	printf("%s:%d: l2params settings:\n num_tc %d,\n num_apps %d,\n",
184 	       __func__, __LINE__, l2params->num_tc, l2params->num_apps);
185 	printf(" vsi_prio_type %d,\n vsi_rel_bw %d,\n egress_virt_up:",
186 	       l2params->vsi_prio_type, l2params->vsi_rel_bw);
187 	for (i = 0; i < l2params->num_tc; i++)
188 		printf(" %d", l2params->tc_info[i].egress_virt_up);
189 	printf("\n ingress_virt_up:");
190 	for (i = 0; i < l2params->num_tc; i++)
191 		printf(" %d", l2params->tc_info[i].ingress_virt_up);
192 	printf("\n prio_type:");
193 	for (i = 0; i < l2params->num_tc; i++)
194 		printf(" %d", l2params->tc_info[i].prio_type);
195 	printf("\n rel_bw:");
196 	for (i = 0; i < l2params->num_tc; i++)
197 		printf(" %d", l2params->tc_info[i].rel_bw);
198 	printf("\n tc_ctx:");
199 	for (i = 0; i < l2params->num_tc; i++)
200 		printf(" %lu", l2params->tc_info[i].tc_ctx);
201 	printf("\n up2tc:");
202 	for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++)
203 		printf(" %d", l2params->up2tc[i]);
204 	printf(" dscp_mode: %d,\n", l2params->dscp_mode);
205 	for (i = 0; i < IRDMA_DSCP_NUM_VAL; i++)
206 		printf(" %d", l2params->dscp_map[i]);
207 	printf("\n");
208 
209 	dump_struct(l2params, sizeof(*l2params), "l2params");
210 }
211 
212 /**
213  * irdma_log_invalid_mtu - check mtu setting validity
214  * @mtu: mtu value
215  * @dev: hardware control device structure
216  */
217 static void
218 irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev)
219 {
220 	if (mtu < IRDMA_MIN_MTU_IPV4)
221 		irdma_dev_warn(dev, "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 576 for IPv4\n", mtu);
222 	else if (mtu < IRDMA_MIN_MTU_IPV6)
223 		irdma_dev_warn(dev, "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 1280 for IPv6\\n", mtu);
224 }
225 
226 /**
227  * irdma_event_handler - handling events from lan driver
228  * @peer: the peer interface structure
229  * @event: event info structure
230  */
231 static void
232 irdma_event_handler(struct ice_rdma_peer *peer, struct ice_rdma_event *event)
233 {
234 	struct irdma_device *iwdev;
235 	struct irdma_l2params l2params = {};
236 
237 	printf("%s:%d event_handler %s (%x) on pf %d (%d)\n", __func__, __LINE__,
238 	       (event->type == 1) ? "LINK CHANGE" :
239 	       (event->type == 2) ? "MTU CHANGE" :
240 	       (event->type == 3) ? "TC CHANGE" : "UNKNOWN",
241 	       event->type, peer->pf_id, pf_if_d(peer));
242 	iwdev = peer_to_iwdev(peer);
243 	if (!iwdev) {
244 		printf("%s:%d rdma device not found\n", __func__, __LINE__);
245 		return;
246 	}
247 
248 	switch (event->type) {
249 	case ICE_RDMA_EVENT_LINK_CHANGE:
250 		printf("%s:%d PF: %x (%x), state: %d, speed: %lu\n", __func__, __LINE__,
251 		       peer->pf_id, pf_if_d(peer), event->linkstate, event->baudrate);
252 		break;
253 	case ICE_RDMA_EVENT_MTU_CHANGE:
254 		if (iwdev->vsi.mtu != event->mtu) {
255 			l2params.mtu = event->mtu;
256 			l2params.mtu_changed = true;
257 			irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev);
258 			irdma_change_l2params(&iwdev->vsi, &l2params);
259 		}
260 		break;
261 	case ICE_RDMA_EVENT_TC_CHANGE:
262 		/*
263 		 * 1. check if it is pre or post 2. check if it is currently being done
264 		 */
265 		if (event->prep == iwdev->vsi.tc_change_pending) {
266 			printf("%s:%d can't process %s TC change if TC change is %spending\n",
267 			       __func__, __LINE__,
268 			       event->prep ? "pre" : "post",
269 			       event->prep ? " " : "not ");
270 			goto done;
271 		}
272 		if (event->prep) {
273 			iwdev->vsi.tc_change_pending = true;
274 			irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_SUSPEND);
275 			wait_event_timeout(iwdev->suspend_wq,
276 					   !atomic_read(&iwdev->vsi.qp_suspend_reqs),
277 					   IRDMA_EVENT_TIMEOUT_MS * 10);
278 			irdma_ws_reset(&iwdev->vsi);
279 			printf("%s:%d TC change preparation done\n", __func__, __LINE__);
280 		} else {
281 			l2params.tc_changed = true;
282 			irdma_get_qos_info(&l2params, &event->port_qos);
283 			if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
284 				iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode;
285 
286 			irdma_check_fc_for_tc_update(&iwdev->vsi, &l2params);
287 			irdma_change_l2params(&iwdev->vsi, &l2params);
288 			printf("%s:%d TC change done\n", __func__, __LINE__);
289 		}
290 		break;
291 	case ICE_RDMA_EVENT_CRIT_ERR:
292 		printf("%s:%d event type received: %d\n", __func__, __LINE__, event->type);
293 		break;
294 	default:
295 		printf("%s:%d event type unsupported: %d\n", __func__, __LINE__, event->type);
296 	}
297 done:
298 	return;
299 }
300 
301 /**
302  * irdma_link_change - Callback for link state change
303  * @peer: the peer interface structure
304  * @linkstate: state of the link
305  * @baudrate: speed of the link
306  */
307 static void
308 irdma_link_change(struct ice_rdma_peer *peer, int linkstate, uint64_t baudrate)
309 {
310 	printf("%s:%d PF: %x (%x), state: %d, speed: %lu\n", __func__, __LINE__,
311 	       peer->pf_id, pf_if_d(peer), linkstate, baudrate);
312 }
313 
314 /**
315  * irdma_finalize_task - Finish open or close phase in a separate thread
316  * @context: instance holding peer and iwdev information
317  *
318  * Triggered from irdma_open or irdma_close to perform rt_init_hw or
319  * rt_deinit_hw respectively. Does registration and unregistration of
320  * the device.
321  */
322 static void
323 irdma_finalize_task(void *context, int pending)
324 {
325 	struct irdma_task_arg *task_arg = (struct irdma_task_arg *)context;
326 	struct irdma_device *iwdev = task_arg->iwdev;
327 	struct irdma_pci_f *rf = iwdev->rf;
328 	struct ice_rdma_peer *peer = task_arg->peer;
329 	struct irdma_l2params l2params = {{{0}}};
330 	struct ice_rdma_request req = {0};
331 	int status = 0;
332 
333 	if (iwdev->iw_status) {
334 		irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT, "Starting deferred closing %d (%d)\n",
335 			    rf->peer_info->pf_id, pf_if_d(peer));
336 		irdma_dereg_ipaddr_event_cb(rf);
337 		irdma_ib_unregister_device(iwdev);
338 		req.type = ICE_RDMA_EVENT_VSI_FILTER_UPDATE;
339 		req.enable_filter = false;
340 		IRDMA_DI_REQ_HANDLER(peer, &req);
341 		irdma_rt_deinit_hw(iwdev);
342 	} else {
343 		irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT, "Starting deferred opening %d (%d)\n",
344 			    rf->peer_info->pf_id, pf_if_d(peer));
345 		l2params.mtu = peer->mtu;
346 		irdma_get_qos_info(&l2params, &peer->initial_qos_info);
347 		if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
348 			iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode;
349 
350 		status = irdma_rt_init_hw(iwdev, &l2params);
351 		if (status) {
352 			irdma_pr_err("RT init failed %d\n", status);
353 			ib_dealloc_device(&iwdev->ibdev);
354 			return;
355 		}
356 		status = irdma_ib_register_device(iwdev);
357 		if (status) {
358 			irdma_pr_err("Registration failed %d\n", status);
359 			irdma_rt_deinit_hw(iwdev);
360 			ib_dealloc_device(&iwdev->ibdev);
361 		}
362 		req.type = ICE_RDMA_EVENT_VSI_FILTER_UPDATE;
363 		req.enable_filter = true;
364 		IRDMA_DI_REQ_HANDLER(peer, &req);
365 		irdma_reg_ipaddr_event_cb(rf);
366 		irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT, "Deferred opening finished %d (%d)\n",
367 			    rf->peer_info->pf_id, pf_if_d(peer));
368 	}
369 }
370 
371 /**
372  * irdma_open - Callback for operation open for RDMA device
373  * @peer: the new peer interface structure
374  *
375  * Callback implementing the RDMA_OPEN function. Called by the ice driver to
376  * notify the RDMA client driver that a new device has been initialized.
377  */
378 static int
379 irdma_open(struct ice_rdma_peer *peer)
380 {
381 	struct ice_rdma_event event = {0};
382 
383 	event.type = ICE_RDMA_EVENT_MTU_CHANGE;
384 	event.mtu = peer->mtu;
385 
386 	irdma_event_handler(peer, &event);
387 
388 	return 0;
389 }
390 
391 /**
392  * irdma_close - Callback to notify that a peer device is down
393  * @peer: the RDMA peer device being stopped
394  *
395  * Callback implementing the RDMA_CLOSE function. Called by the ice driver to
396  * notify the RDMA client driver that a peer device is being stopped.
397  */
398 static int
399 irdma_close(struct ice_rdma_peer *peer)
400 {
401 	/*
402 	 * This is called when ifconfig down. Keeping it for compatibility with ice. This event might be usefull for
403 	 * future.
404 	 */
405 	return 0;
406 }
407 
408 /**
409  * irdma_alloc_pcidev - allocate memory for pcidev and populate data
410  * @peer: the new peer interface structure
411  * @rf: RDMA PCI function
412  */
413 static int
414 irdma_alloc_pcidev(struct ice_rdma_peer *peer, struct irdma_pci_f *rf)
415 {
416 	rf->pcidev = kzalloc(sizeof(struct pci_dev), GFP_KERNEL);
417 	if (!rf->pcidev) {
418 		return -ENOMEM;
419 	}
420 	if (linux_pci_attach_device(rf->dev_ctx.dev, NULL, NULL, rf->pcidev))
421 		return -ENOMEM;
422 
423 	return 0;
424 }
425 
426 /**
427  * irdma_dealloc_pcidev - deallocate memory for pcidev
428  * @rf: RDMA PCI function
429  */
430 static void
431 irdma_dealloc_pcidev(struct irdma_pci_f *rf)
432 {
433 	linux_pci_detach_device(rf->pcidev);
434 	kfree(rf->pcidev);
435 }
436 
437 /**
438  * irdma_fill_device_info - assign initial values to rf variables
439  * @iwdev: irdma device
440  * @peer: the peer interface structure
441  */
442 static void
443 irdma_fill_device_info(struct irdma_device *iwdev,
444 		       struct ice_rdma_peer *peer)
445 {
446 	struct irdma_pci_f *rf = iwdev->rf;
447 
448 	rf->peer_info = peer;
449 	rf->gen_ops.register_qset = irdma_register_qset;
450 	rf->gen_ops.unregister_qset = irdma_unregister_qset;
451 
452 	rf->rdma_ver = IRDMA_GEN_2;
453 	rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_2;
454 	rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT;
455 	rf->rst_to = IRDMA_RST_TIMEOUT_HZ;
456 	rf->check_fc = irdma_check_fc_for_qp;
457 	irdma_set_rf_user_cfg_params(rf);
458 
459 	rf->default_vsi.vsi_idx = peer->pf_vsi_num;
460 	rf->dev_ctx.dev = peer->dev;
461 	rf->dev_ctx.mem_bus_space_tag = rman_get_bustag(peer->pci_mem);
462 	rf->dev_ctx.mem_bus_space_handle = rman_get_bushandle(peer->pci_mem);
463 	rf->dev_ctx.mem_bus_space_size = rman_get_size(peer->pci_mem);
464 
465 	rf->hw.dev_context = &rf->dev_ctx;
466 	rf->hw.hw_addr = (u8 *)rman_get_virtual(peer->pci_mem);
467 	rf->msix_count = peer->msix.count;
468 	rf->msix_info.entry = peer->msix.base;
469 	rf->msix_info.vector = peer->msix.count;
470 	printf("%s:%d msix_info: %d %d %d\n", __func__, __LINE__,
471 	       rf->msix_count, rf->msix_info.entry, rf->msix_info.vector);
472 
473 	rf->iwdev = iwdev;
474 	iwdev->netdev = peer->ifp;
475 	iwdev->init_state = INITIAL_STATE;
476 	iwdev->vsi_num = peer->pf_vsi_num;
477 	iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED;
478 	iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE;
479 	iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT;
480 	iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT;
481 
482 	if (rf->protocol_used == IRDMA_ROCE_PROTOCOL_ONLY) {
483 		iwdev->roce_mode = true;
484 	}
485 }
486 
487 /**
488  * irdma_probe - Callback to probe a new RDMA peer device
489  * @peer: the new peer interface structure
490  *
491  * Callback implementing the RDMA_PROBE function. Called by the ice driver to
492  * notify the RDMA client driver that a new device has been created
493  */
494 static int
495 irdma_probe(struct ice_rdma_peer *peer)
496 {
497 	struct irdma_device *iwdev;
498 	struct irdma_pci_f *rf;
499 	struct irdma_handler *hdl;
500 	int err = 0;
501 
502 	irdma_pr_info("probe: irdma-%s peer=%p, peer->pf_id=%d, peer->ifp=%p, peer->ifp->if_dunit=%d, peer->pci_mem->r_bustag=%p\n",
503 		      irdma_driver_version, peer, peer->pf_id, peer->ifp,
504 		      pf_if_d(peer), (void *)(uintptr_t)peer->pci_mem->r_bustag);
505 
506 	hdl = irdma_find_handler(peer);
507 	if (hdl)
508 		return -EBUSY;
509 
510 	hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
511 	if (!hdl)
512 		return -ENOMEM;
513 
514 	iwdev = (struct irdma_device *)ib_alloc_device(sizeof(*iwdev));
515 	if (!iwdev) {
516 		kfree(hdl);
517 		return -ENOMEM;
518 	}
519 
520 	iwdev->rf = kzalloc(sizeof(*rf), GFP_KERNEL);
521 	if (!iwdev->rf) {
522 		ib_dealloc_device(&iwdev->ibdev);
523 		kfree(hdl);
524 		return -ENOMEM;
525 	}
526 	hdl->iwdev = iwdev;
527 	iwdev->hdl = hdl;
528 
529 	irdma_init_tunable(iwdev->rf, pf_if_d(peer));
530 	irdma_fill_device_info(iwdev, peer);
531 	rf = iwdev->rf;
532 
533 	if (irdma_alloc_pcidev(peer, rf))
534 		goto err_pcidev;
535 
536 	irdma_add_handler(hdl);
537 
538 	if (irdma_ctrl_init_hw(rf)) {
539 		err = -EIO;
540 		goto err_ctrl_init;
541 	}
542 
543 	rf->dev_ctx.task_arg.peer = peer;
544 	rf->dev_ctx.task_arg.iwdev = iwdev;
545 	rf->dev_ctx.task_arg.peer = peer;
546 
547 	TASK_INIT(&hdl->deferred_task, 0, irdma_finalize_task, &rf->dev_ctx.task_arg);
548 	hdl->deferred_tq = taskqueue_create_fast("irdma_defer",
549 						 M_NOWAIT, taskqueue_thread_enqueue,
550 						 &hdl->deferred_tq);
551 	taskqueue_start_threads(&hdl->deferred_tq, 1, PI_NET, "irdma_defer_t");
552 
553 	taskqueue_enqueue(hdl->deferred_tq, &hdl->deferred_task);
554 
555 	return 0;
556 
557 err_ctrl_init:
558 	irdma_del_handler(hdl);
559 	irdma_dealloc_pcidev(rf);
560 err_pcidev:
561 	kfree(iwdev->rf);
562 	ib_dealloc_device(&iwdev->ibdev);
563 	kfree(hdl);
564 
565 	return err;
566 }
567 
568 /**
569  * irdma_remove - Callback to remove an RDMA peer device
570  * @peer: the new peer interface structure
571  *
572  * Callback implementing the RDMA_REMOVE function. Called by the ice driver to
573  * notify the RDMA client driver that the device wille be delated
574  */
575 static int
576 irdma_remove(struct ice_rdma_peer *peer)
577 {
578 	struct irdma_handler *hdl;
579 	struct irdma_device *iwdev;
580 
581 	irdma_debug((struct irdma_sc_dev *)NULL, IRDMA_DEBUG_INIT, "removing %s\n", __FUNCTION__);
582 
583 	hdl = irdma_find_handler(peer);
584 	if (!hdl)
585 		return 0;
586 
587 	iwdev = hdl->iwdev;
588 
589 	if (iwdev->vsi.tc_change_pending) {
590 		iwdev->vsi.tc_change_pending = false;
591 		irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_RESUME);
592 	}
593 
594 	taskqueue_enqueue(hdl->deferred_tq, &hdl->deferred_task);
595 
596 	taskqueue_drain(hdl->deferred_tq, &hdl->deferred_task);
597 	taskqueue_free(hdl->deferred_tq);
598 	hdl->iwdev->rf->dev_ctx.task_arg.iwdev = NULL;
599 	hdl->iwdev->rf->dev_ctx.task_arg.peer = NULL;
600 
601 	sysctl_ctx_free(&iwdev->rf->tun_info.irdma_sysctl_ctx);
602 	hdl->iwdev->rf->tun_info.irdma_sysctl_tree = NULL;
603 
604 	irdma_ctrl_deinit_hw(iwdev->rf);
605 
606 	irdma_dealloc_pcidev(iwdev->rf);
607 
608 	irdma_del_handler(iwdev->hdl);
609 	kfree(iwdev->hdl);
610 	kfree(iwdev->rf);
611 	ib_dealloc_device(&iwdev->ibdev);
612 	irdma_pr_info("IRDMA hardware deinitialization complete\n");
613 
614 	return 0;
615 }
616 
617 /**
618  * irdma_prep_for_unregister - ensure the driver is ready to unregister
619  */
620 static void
621 irdma_prep_for_unregister(void)
622 {
623 	struct irdma_handler *hdl;
624 	unsigned long flags;
625 	bool hdl_valid;
626 
627 	do {
628 		hdl_valid = false;
629 		spin_lock_irqsave(&irdma_handler_lock, flags);
630 		list_for_each_entry(hdl, &irdma_handlers, list) {
631 			if (!hdl)
632 				continue;
633 			if (!hdl->iwdev->rf->peer_info)
634 				continue;
635 			hdl_valid = true;
636 			break;
637 		}
638 		spin_unlock_irqrestore(&irdma_handler_lock, flags);
639 		if (!hdl || !hdl_valid)
640 			break;
641 		IRDMA_CLOSE(hdl->iwdev->rf->peer_info);
642 		IRDMA_REMOVE(hdl->iwdev->rf->peer_info);
643 	} while (1);
644 }
645 
646 static kobj_method_t irdma_methods[] = {
647 	KOBJMETHOD(irdma_probe, irdma_probe),
648 	    KOBJMETHOD(irdma_open, irdma_open),
649 	    KOBJMETHOD(irdma_close, irdma_close),
650 	    KOBJMETHOD(irdma_remove, irdma_remove),
651 	    KOBJMETHOD(irdma_link_change, irdma_link_change),
652 	    KOBJMETHOD(irdma_event_handler, irdma_event_handler),
653 	    KOBJMETHOD_END
654 };
655 
656 /* declare irdma_class which extends the ice_rdma_di class */
657 DEFINE_CLASS_1(irdma, irdma_class, irdma_methods, sizeof(struct ice_rdma_peer), ice_rdma_di_class);
658 
659 static struct ice_rdma_info irdma_info = {
660 	.major_version = ICE_RDMA_MAJOR_VERSION,
661 	.minor_version = ICE_RDMA_MINOR_VERSION,
662 	.patch_version = ICE_RDMA_PATCH_VERSION,
663 	.rdma_class = &irdma_class,
664 };
665 
666 /**
667  * irdma_module_event_handler - Module event handler callback
668  * @mod: unused mod argument
669  * @what: the module event to handle
670  * @arg: unused module event argument
671  *
672  * Callback used by the FreeBSD module stack to notify the driver of module
673  * events. Used to implement custom handling for certain module events such as
674  * load and unload.
675  */
676 static int
677 irdma_module_event_handler(module_t __unused mod, int what, void __unused * arg)
678 {
679 	switch (what) {
680 	case MOD_LOAD:
681 		printf("Loading irdma module\n");
682 		return ice_rdma_register(&irdma_info);
683 	case MOD_UNLOAD:
684 		printf("Unloading irdma module\n");
685 		irdma_prep_for_unregister();
686 		ice_rdma_unregister();
687 		return (0);
688 	default:
689 		return (EOPNOTSUPP);
690 	}
691 
692 	return (0);
693 }
694 
695 static moduledata_t irdma_moduledata = {
696 	"irdma",
697 	    irdma_module_event_handler,
698 	    NULL
699 };
700 
701 DECLARE_MODULE(irdma, irdma_moduledata, SI_SUB_LAST, SI_ORDER_ANY);
702 MODULE_VERSION(irdma, 1);
703 MODULE_DEPEND(irdma, ice, 1, 1, 1);
704 MODULE_DEPEND(irdma, ibcore, 1, 1, 1);
705