xref: /freebsd/sys/dev/ice/ice_rdma.c (revision 397e83df75e0fcd0d3fcb95ae4d794cb7600fc89)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2024, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /**
33  * @file ice_rdma.c
34  * @brief RDMA client driver interface
35  *
36  * Functions to interface with the RDMA client driver, for enabling RMDA
37  * functionality for the ice driver.
38  *
39  * The RDMA client interface is based on a simple kobject interface which is
40  * defined by the rmda_if.m and irdma_di_if.m interfaces.
41  *
42  * The ice device driver provides the rmda_di_if.m interface methods, while
43  * the client RDMA driver provides the irdma_if.m interface methods as an
44  * extension ontop of the irdma_di_if kobject.
45  *
46  * The initial connection between drivers is done via the RDMA client driver
47  * calling ice_rdma_register.
48  */
49 
50 #include "ice_iflib.h"
51 #include "ice_rdma_internal.h"
52 
53 #include "irdma_if.h"
54 #include "irdma_di_if.h"
55 
56 /**
57  * @var ice_rdma
58  * @brief global RDMA driver state
59  *
60  * Contains global state the driver uses to connect to a client RDMA interface
61  * driver.
62  */
63 static struct ice_rdma_state ice_rdma;
64 
65 /*
66  * Helper function prototypes
67  */
68 static int ice_rdma_pf_attach_locked(struct ice_softc *sc);
69 static void ice_rdma_pf_detach_locked(struct ice_softc *sc);
70 static int ice_rdma_check_version(struct ice_rdma_info *info);
71 static void ice_rdma_cp_qos_info(struct ice_hw *hw,
72 				 struct ice_dcbx_cfg *dcbx_cfg,
73 				 struct ice_qos_params *qos_info);
74 
75 /*
76  * RDMA Device Interface prototypes
77  */
78 static int ice_rdma_pf_reset(struct ice_rdma_peer *peer);
79 static int ice_rdma_pf_msix_init(struct ice_rdma_peer *peer,
80 				 struct ice_rdma_msix_mapping *msix_info);
81 static int ice_rdma_qset_register_request(struct ice_rdma_peer *peer,
82 			     struct ice_rdma_qset_update *res);
83 static int ice_rdma_update_vsi_filter(struct ice_rdma_peer *peer_dev,
84 				      bool enable);
85 static void ice_rdma_request_handler(struct ice_rdma_peer *peer,
86 				     struct ice_rdma_request *req);
87 
88 
89 /**
90  * @var ice_rdma_di_methods
91  * @brief RDMA driver interface methods
92  *
93  * Kobject methods implementing the driver-side interface for the RDMA peer
94  * clients. This method table contains the operations which the client can
95  * request from the driver.
96  *
97  * The client driver will then extend this kobject class with methods that the
98  * driver can request from the client.
99  */
100 static kobj_method_t ice_rdma_di_methods[] = {
101 	KOBJMETHOD(irdma_di_reset, ice_rdma_pf_reset),
102 	KOBJMETHOD(irdma_di_msix_init, ice_rdma_pf_msix_init),
103 	KOBJMETHOD(irdma_di_qset_register_request, ice_rdma_qset_register_request),
104 	KOBJMETHOD(irdma_di_vsi_filter_update, ice_rdma_update_vsi_filter),
105 	KOBJMETHOD(irdma_di_req_handler, ice_rdma_request_handler),
106 	KOBJMETHOD_END
107 };
108 
109 /* Define ice_rdma_di class which will be extended by the iRDMA driver */
110 DEFINE_CLASS_0(ice_rdma_di, ice_rdma_di_class, ice_rdma_di_methods, sizeof(struct ice_rdma_peer));
111 
112 /**
113  * ice_rdma_pf_reset - RDMA client interface requested a reset
114  * @peer: the RDMA peer client structure
115  *
116  * Implements IRDMA_DI_RESET, called by the RDMA client driver to request
117  * a reset of an ice driver device.
118  * @return 0 on success
119  */
120 static int
121 ice_rdma_pf_reset(struct ice_rdma_peer *peer)
122 {
123 	struct ice_softc *sc = ice_rdma_peer_to_sc(peer);
124 
125 	/* Tell the base driver that RDMA is requesting a PFR */
126 	ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
127 
128 	/* XXX: Base driver will notify RDMA when it's done */
129 
130 	return (0);
131 }
132 
133 /**
134  * ice_rdma_pf_msix_init - RDMA client interface request MSI-X initialization
135  * @peer: the RDMA peer client structure
136  * @msix_info: requested MSI-X mapping
137  *
138  * Implements IRDMA_DI_MSIX_INIT, called by the RDMA client driver to
139  * initialize the MSI-X resources required for RDMA functionality.
140  * @returns ENOSYS
141  */
142 static int
143 ice_rdma_pf_msix_init(struct ice_rdma_peer *peer,
144 		      struct ice_rdma_msix_mapping __unused *msix_info)
145 {
146 	struct ice_softc *sc = ice_rdma_peer_to_sc(peer);
147 
148 	MPASS(msix_info != NULL);
149 
150 	device_printf(sc->dev, "%s: iRDMA MSI-X initialization request is not yet implemented\n", __func__);
151 
152 	/* TODO: implement MSI-X initialization for RDMA */
153 	return (ENOSYS);
154 }
155 
156 /**
157  * ice_rdma_register_request - RDMA client interface request qset
158  *                             registration or unregistration
159  * @peer: the RDMA peer client structure
160  * @res: resources to be registered or unregistered
161  * @returns 0 on success, EINVAL on argument issues, ENOMEM on memory
162  * allocation failure, EXDEV on vsi device mismatch
163  */
164 static int
165 ice_rdma_qset_register_request(struct ice_rdma_peer *peer, struct ice_rdma_qset_update *res)
166 {
167 	struct ice_softc *sc = ice_rdma_peer_to_sc(peer);
168 	struct ice_vsi *vsi = NULL;
169 	struct ice_dcbx_cfg *dcbx_cfg;
170 	struct ice_hw *hw = &sc->hw;
171 	enum ice_status status;
172 	int count, i, ret = 0;
173 	uint32_t *qset_teid;
174 	uint16_t *qs_handle;
175 	uint16_t max_rdmaqs[ICE_MAX_TRAFFIC_CLASS];
176 	uint16_t vsi_id;
177 	uint8_t ena_tc = 0;
178 
179 	if (!res)
180 		return -EINVAL;
181 
182 	if (res->cnt_req > ICE_MAX_TXQ_PER_TXQG)
183 		return -EINVAL;
184 
185 	switch(res->res_type) {
186 	case ICE_RDMA_QSET_ALLOC:
187 		count = res->cnt_req;
188 		vsi_id = peer->pf_vsi_num;
189 		break;
190 	case ICE_RDMA_QSET_FREE:
191 		count = res->res_allocated;
192 		vsi_id = res->qsets.vsi_id;
193 		break;
194 	default:
195 		return -EINVAL;
196 	}
197 	qset_teid = (uint32_t *)ice_calloc(hw, count, sizeof(*qset_teid));
198 	if (!qset_teid)
199 		return -ENOMEM;
200 
201 	qs_handle = (uint16_t *)ice_calloc(hw, count, sizeof(*qs_handle));
202 	if (!qs_handle) {
203 		ice_free(hw, qset_teid);
204 		return -ENOMEM;
205 	}
206 
207 	ice_for_each_traffic_class(i)
208 		max_rdmaqs[i] = 0;
209 	for (i = 0; i < sc->num_available_vsi; i++) {
210 		if (sc->all_vsi[i] &&
211 		    ice_get_hw_vsi_num(hw, sc->all_vsi[i]->idx) == vsi_id) {
212 			vsi = sc->all_vsi[i];
213 			break;
214 		}
215 	}
216 
217 	if (!vsi) {
218 		ice_debug(hw, ICE_DBG_RDMA, "RDMA QSet invalid VSI\n");
219 		ret = -EINVAL;
220 		goto out;
221 	}
222 	if (sc != vsi->sc) {
223 		ice_debug(hw, ICE_DBG_RDMA, "VSI is tied to unexpected device\n");
224 		ret = -EXDEV;
225 		goto out;
226 	}
227 
228 	for (i = 0; i < count; i++) {
229 		struct ice_rdma_qset_params *qset;
230 
231 		qset = &res->qsets;
232 		if (qset->vsi_id != peer->pf_vsi_num) {
233 			ice_debug(hw, ICE_DBG_RDMA, "RDMA QSet invalid VSI requested %d %d\n",
234 				  qset->vsi_id, peer->pf_vsi_num);
235 			ret = -EINVAL;
236 			goto out;
237 		}
238 		max_rdmaqs[qset->tc]++;
239 		qs_handle[i] = qset->qs_handle;
240 		qset_teid[i] = qset->teid;
241 	}
242 
243 	switch(res->res_type) {
244 	case ICE_RDMA_QSET_ALLOC:
245 		dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
246 		ena_tc = ice_dcb_get_tc_map(dcbx_cfg);
247 
248 		ice_debug(hw, ICE_DBG_RDMA, "%s:%d ena_tc=%x\n", __func__, __LINE__, ena_tc);
249 		status = ice_cfg_vsi_rdma(hw->port_info, vsi->idx, ena_tc,
250 					  max_rdmaqs);
251 		if (status) {
252 			ice_debug(hw, ICE_DBG_RDMA, "Failed VSI RDMA qset config\n");
253 			ret = -EINVAL;
254 			goto out;
255 		}
256 
257 		for (i = 0; i < count; i++) {
258 			struct ice_rdma_qset_params *qset;
259 
260 			qset = &res->qsets;
261 			status = ice_ena_vsi_rdma_qset(hw->port_info, vsi->idx,
262 						       qset->tc, &qs_handle[i], 1,
263 						       &qset_teid[i]);
264 			if (status) {
265 				ice_debug(hw, ICE_DBG_RDMA, "Failed VSI RDMA qset enable\n");
266 				ret = -EINVAL;
267 				goto out;
268 			}
269 			qset->teid = qset_teid[i];
270 		}
271 		break;
272 	case ICE_RDMA_QSET_FREE:
273 		status = ice_dis_vsi_rdma_qset(hw->port_info, count, qset_teid, qs_handle);
274 		if (status)
275 			ret = -EINVAL;
276 		break;
277 	default:
278 		ret = -EINVAL;
279 		break;
280 	}
281 
282 out:
283 	ice_free(hw, qs_handle);
284 	ice_free(hw, qset_teid);
285 
286 	return ret;
287 }
288 
289 /**
290  *  ice_rdma_update_vsi_filter - configure vsi information
291  *                               when opening or closing rdma driver
292  *  @peer: the RDMA peer client structure
293  *  @enable: enable or disable the rdma filter
294  *  @return 0 on success, EINVAL on wrong vsi
295  */
296 static int
297 ice_rdma_update_vsi_filter(struct ice_rdma_peer *peer,
298 			   bool enable)
299 {
300 	struct ice_softc *sc = ice_rdma_peer_to_sc(peer);
301 	struct ice_vsi *vsi;
302 	int ret;
303 
304 	vsi = &sc->pf_vsi;
305 	if (!vsi)
306 		return -EINVAL;
307 
308 	ret = ice_cfg_iwarp_fltr(&sc->hw, vsi->idx, enable);
309 	if (ret) {
310 		device_printf(sc->dev, "Failed to  %sable iWARP filtering\n",
311 				enable ? "en" : "dis");
312 	} else {
313 		if (enable)
314 			vsi->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
315 		else
316 			vsi->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
317 	}
318 
319 	return ret;
320 }
321 
322 /**
323  * ice_rdma_request_handler - handle requests incoming from RDMA driver
324  * @peer: the RDMA peer client structure
325  * @req: structure containing request
326  */
327 static void
328 ice_rdma_request_handler(struct ice_rdma_peer *peer,
329 			 struct ice_rdma_request *req)
330 {
331 	if (!req || !peer) {
332 		log(LOG_WARNING, "%s: peer or req are not valid\n", __func__);
333 		return;
334 	}
335 
336 	switch(req->type) {
337 	case ICE_RDMA_EVENT_RESET:
338 		ice_rdma_pf_reset(peer);
339 		break;
340 	case ICE_RDMA_EVENT_QSET_REGISTER:
341 		ice_rdma_qset_register_request(peer, &req->res);
342 		break;
343 	case ICE_RDMA_EVENT_VSI_FILTER_UPDATE:
344 		ice_rdma_update_vsi_filter(peer, req->enable_filter);
345 		break;
346 	default:
347 		log(LOG_WARNING, "%s: Event %d not supported\n", __func__, req->type);
348 		break;
349 	}
350 }
351 
352 /**
353  * ice_rdma_cp_qos_info - gather current QOS/DCB settings in LAN to pass
354  *                        to RDMA driver
355  * @hw: ice hw structure
356  * @dcbx_cfg: current DCB settings in ice driver
357  * @qos_info: destination of the DCB settings
358  */
359 static void
360 ice_rdma_cp_qos_info(struct ice_hw *hw, struct ice_dcbx_cfg *dcbx_cfg,
361 		     struct ice_qos_params *qos_info)
362 {
363 	u32 up2tc;
364 	u8 j;
365 	u8 num_tc = 0;
366 	u8 val_tc = 0;  /* number of TC for validation */
367 	u8 cnt_tc = 0;
368 
369 	/* setup qos_info fields with defaults */
370 	qos_info->num_apps = 0;
371 	qos_info->num_tc = 1;
372 
373 	for (j = 0; j < ICE_TC_MAX_USER_PRIORITY; j++)
374 		qos_info->up2tc[j] = 0;
375 
376 	qos_info->tc_info[0].rel_bw = 100;
377 	for (j = 1; j < IEEE_8021QAZ_MAX_TCS; j++)
378 		qos_info->tc_info[j].rel_bw = 0;
379 
380 	/* gather current values */
381 	up2tc = rd32(hw, PRTDCB_TUP2TC);
382 	qos_info->num_apps = dcbx_cfg->numapps;
383 
384 	for (j = 0; j < ICE_MAX_TRAFFIC_CLASS; j++) {
385 		num_tc |= BIT(dcbx_cfg->etscfg.prio_table[j]);
386 	}
387 	for (j = 0; j < ICE_MAX_TRAFFIC_CLASS; j++) {
388 		if (num_tc & BIT(j)) {
389 			cnt_tc++;
390 			val_tc |= BIT(j);
391 		} else {
392 			break;
393 		}
394 	}
395 	qos_info->num_tc = (val_tc == num_tc && num_tc != 0) ? cnt_tc : 1;
396 	for (j = 0; j < ICE_TC_MAX_USER_PRIORITY; j++)
397 		qos_info->up2tc[j] = (up2tc >> (j * 3)) & 0x7;
398 
399 	for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++)
400 		qos_info->tc_info[j].rel_bw = dcbx_cfg->etscfg.tcbwtable[j];
401 	for (j = 0; j < qos_info->num_apps; j++) {
402 		qos_info->apps[j].priority = dcbx_cfg->app[j].priority;
403 		qos_info->apps[j].prot_id = dcbx_cfg->app[j].prot_id;
404 		qos_info->apps[j].selector = dcbx_cfg->app[j].selector;
405 	}
406 
407 	/* Gather DSCP-to-TC mapping and QoS/PFC mode */
408 	memcpy(qos_info->dscp_map, dcbx_cfg->dscp_map, sizeof(qos_info->dscp_map));
409 	qos_info->pfc_mode = dcbx_cfg->pfc_mode;
410 }
411 
412 /**
413  * ice_rdma_check_version - Check that the provided RDMA version is compatible
414  * @info: the RDMA client information structure
415  *
416  * Verify that the client RDMA driver provided a version that is compatible
417  * with the driver interface.
418  * @return 0 on success, ENOTSUP when LAN-RDMA interface version doesn't match,
419  * EINVAL on kobject interface fail.
420  */
421 static int
422 ice_rdma_check_version(struct ice_rdma_info *info)
423 {
424 	/* Make sure the MAJOR version matches */
425 	if (info->major_version != ICE_RDMA_MAJOR_VERSION) {
426 		log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports major version %d.x.x\n",
427 		    __func__,
428 		    info->major_version, info->minor_version, info->patch_version,
429 		    ICE_RDMA_MAJOR_VERSION);
430 		return (ENOTSUP);
431 	}
432 
433 	/*
434 	 * Make sure that the MINOR version is compatible.
435 	 *
436 	 * This means that the RDMA client driver version MUST not be greater
437 	 * than the version provided by the driver, as it would indicate that
438 	 * the RDMA client expects features which are not supported by the
439 	 * main driver.
440 	 */
441 	if (info->minor_version > ICE_RDMA_MINOR_VERSION) {
442 		log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports up to minor version %d.%d.x\n",
443 		__func__,
444 		info->major_version, info->minor_version, info->patch_version,
445 		ICE_RDMA_MAJOR_VERSION, ICE_RDMA_MINOR_VERSION);
446 		return (ENOTSUP);
447 	}
448 
449 	/*
450 	 * Make sure that the PATCH version is compatible.
451 	 *
452 	 * This means that the RDMA client version MUST not be greater than
453 	 * the version provided by the driver, as it may indicate that the
454 	 * RDMA client expects certain backwards compatible bug fixes which
455 	 * are not implemented by this version of the main driver.
456 	 */
457 	if ((info->minor_version == ICE_RDMA_MINOR_VERSION) &&
458 	    (info->patch_version > ICE_RDMA_PATCH_VERSION)) {
459 		log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports up to patch version %d.%d.%d\n",
460 		__func__,
461 		info->major_version, info->minor_version, info->patch_version,
462 		ICE_RDMA_MAJOR_VERSION, ICE_RDMA_MINOR_VERSION, ICE_RDMA_PATCH_VERSION);
463 		return (ENOTSUP);
464 	}
465 
466 	/* Make sure that the kobject class is initialized */
467 	if (info->rdma_class == NULL) {
468 		log(LOG_WARNING, "%s: the iRDMA driver did not specify a kobject interface\n",
469 		    __func__);
470 		return (EINVAL);
471 	}
472 
473 	return (0);
474 }
475 
476 /**
477  * ice_rdma_register - Register an RDMA client driver
478  * @info: the RDMA client information structure
479  *
480  * Called by the RDMA client driver on load. Used to initialize the RDMA
481  * client driver interface and enable interop between the ice driver and the
482  * RDMA client driver.
483  *
484  * The RDMA client driver must provide the version number it expects, along
485  * with a pointer to a kobject class that extends the irdma_di_if class, and
486  * implements the irdma_if class interface.
487  * @return 0 on success, ECONNREFUSED when RDMA is turned off, EBUSY when irdma
488  * already registered, ENOTSUP when LAN-RDMA interface version doesn't match,
489  * EINVAL on kobject interface fail.
490  */
491 int
492 ice_rdma_register(struct ice_rdma_info *info)
493 {
494 	struct ice_rdma_entry *entry;
495 	struct ice_softc *sc;
496 	int err = 0;
497 
498 	sx_xlock(&ice_rdma.mtx);
499 
500 	if (!ice_enable_irdma) {
501 		log(LOG_INFO, "%s: The iRDMA driver interface has been disabled\n", __func__);
502 		err = (ECONNREFUSED);
503 		goto return_unlock;
504 	}
505 
506 	if (ice_rdma.registered) {
507 		log(LOG_WARNING, "%s: iRDMA driver already registered\n", __func__);
508 		err = (EBUSY);
509 		goto return_unlock;
510 	}
511 
512 	/* Make sure the iRDMA version is compatible */
513 	err = ice_rdma_check_version(info);
514 	if (err)
515 		goto return_unlock;
516 
517 	log(LOG_INFO, "%s: iRDMA driver registered using version %d.%d.%d\n",
518 	    __func__, info->major_version, info->minor_version, info->patch_version);
519 
520 	ice_rdma.peer_class = info->rdma_class;
521 
522 	/*
523 	 * Initialize the kobject interface and notify the RDMA client of each
524 	 * existing PF interface.
525 	 */
526 	LIST_FOREACH(entry, &ice_rdma.peers, node) {
527 		kobj_init((kobj_t)&entry->peer, ice_rdma.peer_class);
528 		/* Gather DCB/QOS info into peer */
529 		sc = __containerof(entry, struct ice_softc, rdma_entry);
530 		memset(&entry->peer.initial_qos_info, 0, sizeof(entry->peer.initial_qos_info));
531 		ice_rdma_cp_qos_info(&sc->hw, &sc->hw.port_info->qos_cfg.local_dcbx_cfg,
532 				     &entry->peer.initial_qos_info);
533 
534 		IRDMA_PROBE(&entry->peer);
535 		if (entry->initiated)
536 			IRDMA_OPEN(&entry->peer);
537 	}
538 	ice_rdma.registered = true;
539 
540 return_unlock:
541 	sx_xunlock(&ice_rdma.mtx);
542 
543 	return (err);
544 }
545 
546 /**
547  * ice_rdma_unregister - Unregister an RDMA client driver
548  *
549  * Called by the RDMA client driver on unload. Used to de-initialize the RDMA
550  * client driver interface and shut down communication between the ice driver
551  * and the RDMA client driver.
552  * @return 0 on success, ENOENT when irdma driver wasn't registered
553  */
554 int
555 ice_rdma_unregister(void)
556 {
557 	struct ice_rdma_entry *entry;
558 
559 	sx_xlock(&ice_rdma.mtx);
560 
561 	if (!ice_rdma.registered) {
562 		log(LOG_WARNING, "%s: iRDMA driver was not previously registered\n",
563 		       __func__);
564 		sx_xunlock(&ice_rdma.mtx);
565 		return (ENOENT);
566 	}
567 
568 	log(LOG_INFO, "%s: iRDMA driver unregistered\n", __func__);
569 	ice_rdma.registered = false;
570 	ice_rdma.peer_class = NULL;
571 
572 	/*
573 	 * Release the kobject interface for each of the existing PF
574 	 * interfaces. Note that we do not notify the client about removing
575 	 * each PF, as it is assumed that the client will have already cleaned
576 	 * up any associated resources when it is unregistered.
577 	 */
578 	LIST_FOREACH(entry, &ice_rdma.peers, node)
579 		kobj_delete((kobj_t)&entry->peer, NULL);
580 
581 	sx_xunlock(&ice_rdma.mtx);
582 
583 	return (0);
584 }
585 
586 /**
587  * ice_rdma_init - RDMA driver init routine
588  *
589  * Called during ice driver module initialization to setup the RDMA client
590  * interface mutex and RDMA peer structure list.
591  */
592 void
593 ice_rdma_init(void)
594 {
595 	LIST_INIT(&ice_rdma.peers);
596 	sx_init_flags(&ice_rdma.mtx, "ice rdma interface", SX_DUPOK);
597 
598 	ice_rdma.registered = false;
599 	ice_rdma.peer_class = NULL;
600 }
601 
602 /**
603  * ice_rdma_exit - RDMA driver exit routine
604  *
605  * Called during ice driver module exit to shutdown the RDMA client interface
606  * mutex.
607  */
608 void
609 ice_rdma_exit(void)
610 {
611 	MPASS(LIST_EMPTY(&ice_rdma.peers));
612 	sx_destroy(&ice_rdma.mtx);
613 }
614 
615 /**
616  * ice_rdma_pf_attach_locked - Prepare a PF for RDMA connections
617  * @sc: the ice driver softc
618  *
619  * Initialize a peer entry for this PF and add it to the RDMA interface list.
620  * Notify the client RDMA driver of a new PF device.
621  *
622  * @pre must be called while holding the ice_rdma mutex.
623  * @return 0 on success and when RDMA feature is not available, EEXIST when
624  * irdma is already attached
625  */
626 static int
627 ice_rdma_pf_attach_locked(struct ice_softc *sc)
628 {
629 	struct ice_rdma_entry *entry;
630 
631 	/* Do not attach the PF unless RDMA is supported */
632 	if (!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA))
633 		return (0);
634 
635 	entry = &sc->rdma_entry;
636 	if (entry->attached) {
637 		device_printf(sc->dev, "iRDMA peer entry already exists\n");
638 		return (EEXIST);
639 	}
640 
641 	entry->attached = true;
642 	entry->peer.dev = sc->dev;
643 	entry->peer.ifp = sc->ifp;
644 	entry->peer.pf_id = sc->hw.pf_id;
645 	entry->peer.pci_mem = sc->bar0.res;
646 	entry->peer.pf_vsi_num = ice_get_hw_vsi_num(&sc->hw, sc->pf_vsi.idx);
647 	if (sc->rdma_imap && sc->rdma_imap[0] != ICE_INVALID_RES_IDX &&
648 	    sc->irdma_vectors > 0) {
649 		entry->peer.msix.base = sc->rdma_imap[0];
650 		entry->peer.msix.count = sc->irdma_vectors;
651 	}
652 
653 	/* Gather DCB/QOS info into peer */
654 	memset(&entry->peer.initial_qos_info, 0, sizeof(entry->peer.initial_qos_info));
655 	ice_rdma_cp_qos_info(&sc->hw, &sc->hw.port_info->qos_cfg.local_dcbx_cfg,
656 			     &entry->peer.initial_qos_info);
657 
658 	/*
659 	 * If the RDMA client driver has already registered, initialize the
660 	 * kobject and notify the client of a new PF
661 	 */
662 	if (ice_rdma.registered) {
663 		kobj_init((kobj_t)&entry->peer, ice_rdma.peer_class);
664 		IRDMA_PROBE(&entry->peer);
665 	}
666 
667 	LIST_INSERT_HEAD(&ice_rdma.peers, entry, node);
668 
669 	ice_set_bit(ICE_FEATURE_RDMA, sc->feat_en);
670 
671 	return (0);
672 }
673 
674 /**
675  * ice_rdma_pf_attach - Notify the RDMA client of a new PF
676  * @sc: the ice driver softc
677  *
678  * Called during PF attach to notify the RDMA client of a new PF.
679  * @return 0 or EEXIST if irdma was already attached
680  */
681 int
682 ice_rdma_pf_attach(struct ice_softc *sc)
683 {
684 	int err;
685 
686 	sx_xlock(&ice_rdma.mtx);
687 	err = ice_rdma_pf_attach_locked(sc);
688 	sx_xunlock(&ice_rdma.mtx);
689 
690 	return (err);
691 }
692 
693 /**
694  * ice_rdma_pf_detach_locked - Notify the RDMA client on PF detach
695  * @sc: the ice driver softc
696  *
697  * Notify the RDMA peer client driver of removal of a PF, and release any
698  * RDMA-specific resources associated with that PF. Remove the PF from the
699  * list of available RDMA entries.
700  *
701  * @pre must be called while holding the ice_rdma mutex.
702  */
703 static void
704 ice_rdma_pf_detach_locked(struct ice_softc *sc)
705 {
706 	struct ice_rdma_entry *entry;
707 
708 	/* No need to detach the PF if RDMA is not enabled */
709 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_RDMA))
710 		return;
711 
712 	entry = &sc->rdma_entry;
713 	if (!entry->attached) {
714 		device_printf(sc->dev, "iRDMA peer entry was not attached\n");
715 		return;
716 	}
717 
718 	/*
719 	 * If the RDMA client driver is registered, notify the client that
720 	 * a PF has been removed, and release the kobject reference.
721 	 */
722 	if (ice_rdma.registered) {
723 		IRDMA_REMOVE(&entry->peer);
724 		kobj_delete((kobj_t)&entry->peer, NULL);
725 	}
726 
727 	LIST_REMOVE(entry, node);
728 	entry->attached = false;
729 
730 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_en);
731 }
732 
733 /**
734  * ice_rdma_pf_detach - Notify the RDMA client of a PF detaching
735  * @sc: the ice driver softc
736  *
737  * Take the ice_rdma mutex and then notify the RDMA client that a PF has been
738  * removed.
739  */
740 void
741 ice_rdma_pf_detach(struct ice_softc *sc)
742 {
743 	sx_xlock(&ice_rdma.mtx);
744 	ice_rdma_pf_detach_locked(sc);
745 	sx_xunlock(&ice_rdma.mtx);
746 }
747 
748 /**
749  * ice_rdma_pf_init - Notify the RDMA client that a PF has initialized
750  * @sc: the ice driver softc
751  *
752  * Called by the ice driver when a PF has been initialized. Notifies the RDMA
753  * client that a PF is up and ready to operate.
754  * @return 0 on success, propagates IRDMA_OPEN return value
755  */
756 int
757 ice_rdma_pf_init(struct ice_softc *sc)
758 {
759 	struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
760 
761 	sx_xlock(&ice_rdma.mtx);
762 
763 	/* Update the MTU */
764 	peer->mtu = if_getmtu(sc->ifp);
765 	sc->rdma_entry.initiated = true;
766 
767 	if (sc->rdma_entry.attached && ice_rdma.registered) {
768 		sx_xunlock(&ice_rdma.mtx);
769 		return IRDMA_OPEN(peer);
770 	}
771 
772 	sx_xunlock(&ice_rdma.mtx);
773 
774 	return (0);
775 }
776 
777 /**
778  * ice_rdma_pf_stop - Notify the RDMA client of a stopped PF device
779  * @sc: the ice driver softc
780  *
781  * Called by the ice driver when a PF is stopped. Notifies the RDMA client
782  * driver that the PF has stopped and is not ready to operate.
783  * @return 0 on success
784  */
785 int
786 ice_rdma_pf_stop(struct ice_softc *sc)
787 {
788 	sx_xlock(&ice_rdma.mtx);
789 
790 	sc->rdma_entry.initiated = false;
791 	if (sc->rdma_entry.attached && ice_rdma.registered) {
792 		sx_xunlock(&ice_rdma.mtx);
793 		return IRDMA_CLOSE(&sc->rdma_entry.peer);
794 	}
795 
796 	sx_xunlock(&ice_rdma.mtx);
797 
798 	return (0);
799 }
800 
801 /**
802  * ice_rdma_link_change - Notify RDMA client of a change in link status
803  * @sc: the ice driver softc
804  * @linkstate: the link status
805  * @baudrate: the link rate in bits per second
806  *
807  * Notify the RDMA client of a link status change, by sending it the new link
808  * state and baudrate.
809  *
810  * The link state is represented the same was as in the ifnet structure. It
811  * should be LINK_STATE_UNKNOWN, LINK_STATE_DOWN, or LINK_STATE_UP.
812  */
813 void
814 ice_rdma_link_change(struct ice_softc *sc, int linkstate, uint64_t baudrate)
815 {
816 	struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
817 	struct ice_rdma_event event;
818 
819 	memset(&event, 0, sizeof(struct ice_rdma_event));
820 	event.type = ICE_RDMA_EVENT_LINK_CHANGE;
821 	event.linkstate = linkstate;
822 	event.baudrate = baudrate;
823 
824 	sx_xlock(&ice_rdma.mtx);
825 
826 	if (sc->rdma_entry.attached && ice_rdma.registered)
827 		IRDMA_EVENT_HANDLER(peer, &event);
828 
829 	sx_xunlock(&ice_rdma.mtx);
830 }
831 
832 /**
833  *  ice_rdma_notify_dcb_qos_change - notify RDMA driver to pause traffic
834  *  @sc: the ice driver softc
835  *
836  *  Notify the RDMA driver that QOS/DCB settings are about to change.
837  *  Once the function return, all the QPs should be suspended.
838  */
839 void
840 ice_rdma_notify_dcb_qos_change(struct ice_softc *sc)
841 {
842 	struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
843 	struct ice_rdma_event event;
844 
845 	memset(&event, 0, sizeof(struct ice_rdma_event));
846 	event.type = ICE_RDMA_EVENT_TC_CHANGE;
847 	/* pre-event */
848 	event.prep = true;
849 
850 	sx_xlock(&ice_rdma.mtx);
851 	if (sc->rdma_entry.attached && ice_rdma.registered)
852 		IRDMA_EVENT_HANDLER(peer, &event);
853 	sx_xunlock(&ice_rdma.mtx);
854 }
855 
856 /**
857  *  ice_rdma_dcb_qos_update - pass the changed dcb settings to RDMA driver
858  *  @sc: the ice driver softc
859  *  @pi: the port info structure
860  *
861  *  Pass the changed DCB settings to RDMA traffic. This function should be
862  *  called only after ice_rdma_notify_dcb_qos_change has been called and
863  *  returned before. After the function returns, all the RDMA traffic
864  *  should be resumed.
865  */
866 void
867 ice_rdma_dcb_qos_update(struct ice_softc *sc, struct ice_port_info *pi)
868 {
869 	struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
870 	struct ice_rdma_event event;
871 
872 	memset(&event, 0, sizeof(struct ice_rdma_event));
873 	event.type = ICE_RDMA_EVENT_TC_CHANGE;
874 	/* post-event */
875 	event.prep = false;
876 
877 	/* gather current configuration */
878 	ice_rdma_cp_qos_info(&sc->hw, &pi->qos_cfg.local_dcbx_cfg, &event.port_qos);
879 	sx_xlock(&ice_rdma.mtx);
880 	if (sc->rdma_entry.attached && ice_rdma.registered)
881 		IRDMA_EVENT_HANDLER(peer, &event);
882 	sx_xunlock(&ice_rdma.mtx);
883 }
884 
885 /**
886  *  ice_rdma_notify_pe_intr - notify irdma on incoming interrupts regarding PE
887  *  @sc: the ice driver softc
888  *  @oicr: interrupt cause
889  *
890  *  Pass the information about received interrupt to RDMA driver if it was
891  *  relating to PE. Specifically PE_CRITERR and HMC_ERR.
892  *  The irdma driver shall decide what should be done upon these interrupts.
893  */
894 void
895 ice_rdma_notify_pe_intr(struct ice_softc *sc, uint32_t oicr)
896 {
897 	struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
898 	struct ice_rdma_event event;
899 
900 	memset(&event, 0, sizeof(struct ice_rdma_event));
901 	event.type = ICE_RDMA_EVENT_CRIT_ERR;
902 	event.oicr_reg = oicr;
903 
904 	sx_xlock(&ice_rdma.mtx);
905 	if (sc->rdma_entry.attached && ice_rdma.registered)
906 		IRDMA_EVENT_HANDLER(peer, &event);
907 	sx_xunlock(&ice_rdma.mtx);
908 }
909 
910 /**
911  *  ice_rdma_notify_reset - notify irdma on incoming pf-reset
912  *  @sc: the ice driver softc
913  *
914  *  Inform irdma driver of an incoming PF reset.
915  *  The irdma driver shall set its state to reset, and avoid using CQP
916  *  anymore. Next step should be to call ice_rdma_pf_stop in order to
917  *  remove resources.
918  */
919 void
920 ice_rdma_notify_reset(struct ice_softc *sc)
921 {
922 	struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
923 	struct ice_rdma_event event;
924 
925 	memset(&event, 0, sizeof(struct ice_rdma_event));
926 	event.type = ICE_RDMA_EVENT_RESET;
927 
928 	sx_xlock(&ice_rdma.mtx);
929 	if (sc->rdma_entry.attached && ice_rdma.registered)
930 	        IRDMA_EVENT_HANDLER(peer, &event);
931 	sx_xunlock(&ice_rdma.mtx);
932 }
933