xref: /freebsd/sys/dev/ice/if_ice_iflib.c (revision 4d3fc8b0570b29fb0d6ee9525f104d52176ff0d4)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2022, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 /*$FreeBSD$*/
32 
33 /**
34  * @file if_ice_iflib.c
35  * @brief iflib driver implementation
36  *
37  * Contains the main entry point for the iflib driver implementation. It
38  * implements the various ifdi driver methods, and sets up the module and
39  * driver values to load an iflib driver.
40  */
41 
42 #include "ice_iflib.h"
43 #include "ice_drv_info.h"
44 #include "ice_switch.h"
45 #include "ice_sched.h"
46 
47 #include <sys/module.h>
48 #include <sys/sockio.h>
49 #include <sys/smp.h>
50 #include <dev/pci/pcivar.h>
51 #include <dev/pci/pcireg.h>
52 
53 /*
54  * Device method prototypes
55  */
56 
57 static void *ice_register(device_t);
58 static int  ice_if_attach_pre(if_ctx_t);
59 static int  ice_attach_pre_recovery_mode(struct ice_softc *sc);
60 static int  ice_if_attach_post(if_ctx_t);
61 static void ice_attach_post_recovery_mode(struct ice_softc *sc);
62 static int  ice_if_detach(if_ctx_t);
63 static int  ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets);
64 static int  ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets);
65 static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix);
66 static void ice_if_queues_free(if_ctx_t ctx);
67 static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu);
68 static void ice_if_intr_enable(if_ctx_t ctx);
69 static void ice_if_intr_disable(if_ctx_t ctx);
70 static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
71 static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
72 static int ice_if_promisc_set(if_ctx_t ctx, int flags);
73 static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
74 static int ice_if_media_change(if_ctx_t ctx);
75 static void ice_if_init(if_ctx_t ctx);
76 static void ice_if_timer(if_ctx_t ctx, uint16_t qid);
77 static void ice_if_update_admin_status(if_ctx_t ctx);
78 static void ice_if_multi_set(if_ctx_t ctx);
79 static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag);
80 static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
81 static void ice_if_stop(if_ctx_t ctx);
82 static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter);
83 static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
84 static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
85 static int ice_if_suspend(if_ctx_t ctx);
86 static int ice_if_resume(if_ctx_t ctx);
87 
88 static int ice_msix_que(void *arg);
89 static int ice_msix_admin(void *arg);
90 
91 /*
92  * Helper function prototypes
93  */
94 static int ice_pci_mapping(struct ice_softc *sc);
95 static void ice_free_pci_mapping(struct ice_softc *sc);
96 static void ice_update_link_status(struct ice_softc *sc, bool update_media);
97 static void ice_init_device_features(struct ice_softc *sc);
98 static void ice_init_tx_tracking(struct ice_vsi *vsi);
99 static void ice_handle_reset_event(struct ice_softc *sc);
100 static void ice_handle_pf_reset_request(struct ice_softc *sc);
101 static void ice_prepare_for_reset(struct ice_softc *sc);
102 static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc);
103 static void ice_rebuild(struct ice_softc *sc);
104 static void ice_rebuild_recovery_mode(struct ice_softc *sc);
105 static void ice_free_irqvs(struct ice_softc *sc);
106 static void ice_update_rx_mbuf_sz(struct ice_softc *sc);
107 static void ice_poll_for_media_avail(struct ice_softc *sc);
108 static void ice_setup_scctx(struct ice_softc *sc);
109 static int ice_allocate_msix(struct ice_softc *sc);
110 static void ice_admin_timer(void *arg);
111 static void ice_transition_recovery_mode(struct ice_softc *sc);
112 static void ice_transition_safe_mode(struct ice_softc *sc);
113 
114 /*
115  * Device Interface Declaration
116  */
117 
118 /**
119  * @var ice_methods
120  * @brief ice driver method entry points
121  *
122  * List of device methods implementing the generic device interface used by
123  * the device stack to interact with the ice driver. Since this is an iflib
124  * driver, most of the methods point to the generic iflib implementation.
125  */
126 static device_method_t ice_methods[] = {
127 	/* Device interface */
128 	DEVMETHOD(device_register, ice_register),
129 	DEVMETHOD(device_probe,    iflib_device_probe_vendor),
130 	DEVMETHOD(device_attach,   iflib_device_attach),
131 	DEVMETHOD(device_detach,   iflib_device_detach),
132 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
133 	DEVMETHOD(device_suspend,  iflib_device_suspend),
134 	DEVMETHOD(device_resume,   iflib_device_resume),
135 	DEVMETHOD_END
136 };
137 
138 /**
139  * @var ice_iflib_methods
140  * @brief iflib method entry points
141  *
142  * List of device methods used by the iflib stack to interact with this
143  * driver. These are the real main entry points used to interact with this
144  * driver.
145  */
146 static device_method_t ice_iflib_methods[] = {
147 	DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre),
148 	DEVMETHOD(ifdi_attach_post, ice_if_attach_post),
149 	DEVMETHOD(ifdi_detach, ice_if_detach),
150 	DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc),
151 	DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc),
152 	DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign),
153 	DEVMETHOD(ifdi_queues_free, ice_if_queues_free),
154 	DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set),
155 	DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable),
156 	DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable),
157 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable),
158 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable),
159 	DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set),
160 	DEVMETHOD(ifdi_media_status, ice_if_media_status),
161 	DEVMETHOD(ifdi_media_change, ice_if_media_change),
162 	DEVMETHOD(ifdi_init, ice_if_init),
163 	DEVMETHOD(ifdi_stop, ice_if_stop),
164 	DEVMETHOD(ifdi_timer, ice_if_timer),
165 	DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status),
166 	DEVMETHOD(ifdi_multi_set, ice_if_multi_set),
167 	DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register),
168 	DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister),
169 	DEVMETHOD(ifdi_get_counter, ice_if_get_counter),
170 	DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl),
171 	DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req),
172 	DEVMETHOD(ifdi_suspend, ice_if_suspend),
173 	DEVMETHOD(ifdi_resume, ice_if_resume),
174 	DEVMETHOD_END
175 };
176 
177 /**
178  * @var ice_driver
179  * @brief driver structure for the generic device stack
180  *
181  * driver_t definition used to setup the generic device methods.
182  */
183 static driver_t ice_driver = {
184 	.name = "ice",
185 	.methods = ice_methods,
186 	.size = sizeof(struct ice_softc),
187 };
188 
189 /**
190  * @var ice_iflib_driver
191  * @brief driver structure for the iflib stack
192  *
193  * driver_t definition used to setup the iflib device methods.
194  */
195 static driver_t ice_iflib_driver = {
196 	.name = "ice",
197 	.methods = ice_iflib_methods,
198 	.size = sizeof(struct ice_softc),
199 };
200 
201 extern struct if_txrx ice_txrx;
202 extern struct if_txrx ice_recovery_txrx;
203 
204 /**
205  * @var ice_sctx
206  * @brief ice driver shared context
207  *
208  * Structure defining shared values (context) that is used by all instances of
209  * the device. Primarily used to setup details about how the iflib stack
210  * should treat this driver. Also defines the default, minimum, and maximum
211  * number of descriptors in each ring.
212  */
213 static struct if_shared_ctx ice_sctx = {
214 	.isc_magic = IFLIB_MAGIC,
215 	.isc_q_align = PAGE_SIZE,
216 
217 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
218 	/* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but
219 	 * that doesn't make sense since that would be larger than the maximum
220 	 * size of a single packet.
221 	 */
222 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
223 
224 	/* XXX: This is only used by iflib to ensure that
225 	 * scctx->isc_tx_tso_size_max + the VLAN header is a valid size.
226 	 */
227 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
228 	/* XXX: This is used by iflib to set the number of segments in the TSO
229 	 * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the
230 	 * related ifnet parameter.
231 	 */
232 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
233 
234 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
235 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
236 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
237 
238 	.isc_nfl = 1,
239 	.isc_ntxqs = 1,
240 	.isc_nrxqs = 1,
241 
242 	.isc_admin_intrcnt = 1,
243 	.isc_vendor_info = ice_vendor_info_array,
244 	.isc_driver_version = __DECONST(char *, ice_driver_version),
245 	.isc_driver = &ice_iflib_driver,
246 
247 	/*
248 	 * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available
249 	 * for hardware checksum offload
250 	 *
251 	 * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the
252 	 * IP sum field, required by our hardware to calculate valid TSO
253 	 * checksums.
254 	 *
255 	 * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs
256 	 * even when the interface is down.
257 	 *
258 	 * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X
259 	 * vectors manually instead of relying on iflib code to do this.
260 	 */
261 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
262 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX,
263 
264 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
265 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
266 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
267 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
268 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
269 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
270 };
271 
272 DRIVER_MODULE(ice, pci, ice_driver, ice_module_event_handler, NULL);
273 
274 MODULE_VERSION(ice, 1);
275 MODULE_DEPEND(ice, pci, 1, 1, 1);
276 MODULE_DEPEND(ice, ether, 1, 1, 1);
277 MODULE_DEPEND(ice, iflib, 1, 1, 1);
278 
279 IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array);
280 
281 /* Static driver-wide sysctls */
282 #include "ice_iflib_sysctls.h"
283 
284 /**
285  * ice_pci_mapping - Map PCI BAR memory
286  * @sc: device private softc
287  *
288  * Map PCI BAR 0 for device operation.
289  */
290 static int
291 ice_pci_mapping(struct ice_softc *sc)
292 {
293 	int rc;
294 
295 	/* Map BAR0 */
296 	rc = ice_map_bar(sc->dev, &sc->bar0, 0);
297 	if (rc)
298 		return rc;
299 
300 	return 0;
301 }
302 
303 /**
304  * ice_free_pci_mapping - Release PCI BAR memory
305  * @sc: device private softc
306  *
307  * Release PCI BARs which were previously mapped by ice_pci_mapping().
308  */
309 static void
310 ice_free_pci_mapping(struct ice_softc *sc)
311 {
312 	/* Free BAR0 */
313 	ice_free_bar(sc->dev, &sc->bar0);
314 }
315 
316 /*
317  * Device methods
318  */
319 
320 /**
321  * ice_register - register device method callback
322  * @dev: the device being registered
323  *
324  * Returns a pointer to the shared context structure, which is used by iflib.
325  */
326 static void *
327 ice_register(device_t dev __unused)
328 {
329 	return &ice_sctx;
330 } /* ice_register */
331 
332 /**
333  * ice_setup_scctx - Setup the iflib softc context structure
334  * @sc: the device private structure
335  *
336  * Setup the parameters in if_softc_ctx_t structure used by the iflib stack
337  * when loading.
338  */
339 static void
340 ice_setup_scctx(struct ice_softc *sc)
341 {
342 	if_softc_ctx_t scctx = sc->scctx;
343 	struct ice_hw *hw = &sc->hw;
344 	bool safe_mode, recovery_mode;
345 
346 	safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE);
347 	recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE);
348 
349 	/*
350 	 * If the driver loads in Safe mode or Recovery mode, limit iflib to
351 	 * a single queue pair.
352 	 */
353 	if (safe_mode || recovery_mode) {
354 		scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1;
355 		scctx->isc_ntxqsets_max = 1;
356 		scctx->isc_nrxqsets_max = 1;
357 	} else {
358 		/*
359 		 * iflib initially sets the isc_ntxqsets and isc_nrxqsets to
360 		 * the values of the override sysctls. Cache these initial
361 		 * values so that the driver can be aware of what the iflib
362 		 * sysctl value is when setting up MSI-X vectors.
363 		 */
364 		sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets;
365 		sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets;
366 
367 		if (scctx->isc_ntxqsets == 0)
368 			scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size;
369 		if (scctx->isc_nrxqsets == 0)
370 			scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size;
371 
372 		scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq;
373 		scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq;
374 
375 		/*
376 		 * Sanity check that the iflib sysctl values are within the
377 		 * maximum supported range.
378 		 */
379 		if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max)
380 			sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max;
381 		if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max)
382 			sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max;
383 	}
384 
385 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
386 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
387 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
388 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
389 
390 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
391 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
392 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
393 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
394 
395 	scctx->isc_msix_bar = PCIR_BAR(ICE_MSIX_BAR);
396 	scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size;
397 
398 	/*
399 	 * If the driver loads in recovery mode, disable Tx/Rx functionality
400 	 */
401 	if (recovery_mode)
402 		scctx->isc_txrx = &ice_recovery_txrx;
403 	else
404 		scctx->isc_txrx = &ice_txrx;
405 
406 	/*
407 	 * If the driver loads in Safe mode or Recovery mode, disable
408 	 * advanced features including hardware offloads.
409 	 */
410 	if (safe_mode || recovery_mode) {
411 		scctx->isc_capenable = ICE_SAFE_CAPS;
412 		scctx->isc_tx_csum_flags = 0;
413 	} else {
414 		scctx->isc_capenable = ICE_FULL_CAPS;
415 		scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
416 	}
417 
418 	scctx->isc_capabilities = scctx->isc_capenable;
419 } /* ice_setup_scctx */
420 
421 /**
422  * ice_if_attach_pre - Early device attach logic
423  * @ctx: the iflib context structure
424  *
425  * Called by iflib during the attach process. Earliest main driver entry
426  * point which performs necessary hardware and driver initialization. Called
427  * before the Tx and Rx queues are allocated.
428  */
429 static int
430 ice_if_attach_pre(if_ctx_t ctx)
431 {
432 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
433 	enum ice_fw_modes fw_mode;
434 	enum ice_status status;
435 	if_softc_ctx_t scctx;
436 	struct ice_hw *hw;
437 	device_t dev;
438 	int err;
439 
440 	device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n");
441 
442 	ice_set_state(&sc->state, ICE_STATE_ATTACHING);
443 
444 	sc->ctx = ctx;
445 	sc->media = iflib_get_media(ctx);
446 	sc->sctx = iflib_get_sctx(ctx);
447 	sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx);
448 
449 	dev = sc->dev = iflib_get_dev(ctx);
450 	scctx = sc->scctx = iflib_get_softc_ctx(ctx);
451 
452 	hw = &sc->hw;
453 	hw->back = sc;
454 
455 	snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name),
456 		 "%s:admin", device_get_nameunit(dev));
457 	mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF);
458 	callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0);
459 
460 	ASSERT_CTX_LOCKED(sc);
461 
462 	if (ice_pci_mapping(sc)) {
463 		err = (ENXIO);
464 		goto destroy_admin_timer;
465 	}
466 
467 	/* Save off the PCI information */
468 	ice_save_pci_info(hw, dev);
469 
470 	/* create tunables as early as possible */
471 	ice_add_device_tunables(sc);
472 
473 	/* Setup ControlQ lengths */
474 	ice_set_ctrlq_len(hw);
475 
476 reinit_hw:
477 
478 	fw_mode = ice_get_fw_mode(hw);
479 	if (fw_mode == ICE_FW_MODE_REC) {
480 		device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
481 
482 		err = ice_attach_pre_recovery_mode(sc);
483 		if (err)
484 			goto free_pci_mapping;
485 
486 		return (0);
487 	}
488 
489 	/* Initialize the hw data structure */
490 	status = ice_init_hw(hw);
491 	if (status) {
492 		if (status == ICE_ERR_FW_API_VER) {
493 			/* Enter recovery mode, so that the driver remains
494 			 * loaded. This way, if the system administrator
495 			 * cannot update the driver, they may still attempt to
496 			 * downgrade the NVM.
497 			 */
498 			err = ice_attach_pre_recovery_mode(sc);
499 			if (err)
500 				goto free_pci_mapping;
501 
502 			return (0);
503 		} else {
504 			err = EIO;
505 			device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n",
506 				      ice_status_str(status),
507 				      ice_aq_str(hw->adminq.sq_last_status));
508 		}
509 		goto free_pci_mapping;
510 	}
511 
512 	ice_init_device_features(sc);
513 
514 	/* Notify firmware of the device driver version */
515 	err = ice_send_version(sc);
516 	if (err)
517 		goto deinit_hw;
518 
519 	/*
520 	 * Success indicates a change was made that requires a reinitialization
521 	 * of the hardware
522 	 */
523 	err = ice_load_pkg_file(sc);
524 	if (err == ICE_SUCCESS) {
525 		ice_deinit_hw(hw);
526 		goto reinit_hw;
527 	}
528 
529 	err = ice_init_link_events(sc);
530 	if (err) {
531 		device_printf(dev, "ice_init_link_events failed: %s\n",
532 			      ice_err_str(err));
533 		goto deinit_hw;
534 	}
535 
536 	/* Initialize VLAN mode in FW; if dual VLAN mode is supported by the package
537 	 * and firmware, this will force them to use single VLAN mode.
538 	 */
539 	status = ice_set_vlan_mode(hw);
540 	if (status) {
541 		err = EIO;
542 		device_printf(dev, "Unable to initialize VLAN mode, err %s aq_err %s\n",
543 			      ice_status_str(status),
544 			      ice_aq_str(hw->adminq.sq_last_status));
545 		goto deinit_hw;
546 	}
547 
548 	ice_print_nvm_version(sc);
549 
550 	/* Setup the MAC address */
551 	iflib_set_mac(ctx, hw->port_info->mac.lan_addr);
552 
553 	/* Setup the iflib softc context structure */
554 	ice_setup_scctx(sc);
555 
556 	/* Initialize the Tx queue manager */
557 	err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq);
558 	if (err) {
559 		device_printf(dev, "Unable to initialize Tx queue manager: %s\n",
560 			      ice_err_str(err));
561 		goto deinit_hw;
562 	}
563 
564 	/* Initialize the Rx queue manager */
565 	err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq);
566 	if (err) {
567 		device_printf(dev, "Unable to initialize Rx queue manager: %s\n",
568 			      ice_err_str(err));
569 		goto free_tx_qmgr;
570 	}
571 
572 	/* Initialize the interrupt resource manager */
573 	err = ice_alloc_intr_tracking(sc);
574 	if (err)
575 		/* Errors are already printed */
576 		goto free_rx_qmgr;
577 
578 	/* Determine maximum number of VSIs we'll prepare for */
579 	sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE,
580 				    hw->func_caps.guar_num_vsi);
581 
582 	if (!sc->num_available_vsi) {
583 		err = EIO;
584 		device_printf(dev, "No VSIs allocated to host\n");
585 		goto free_intr_tracking;
586 	}
587 
588 	/* Allocate storage for the VSI pointers */
589 	sc->all_vsi = (struct ice_vsi **)
590 		malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi,
591 		       M_ICE, M_WAITOK | M_ZERO);
592 	if (!sc->all_vsi) {
593 		err = ENOMEM;
594 		device_printf(dev, "Unable to allocate VSI array\n");
595 		goto free_intr_tracking;
596 	}
597 
598 	/*
599 	 * Prepare the statically allocated primary PF VSI in the softc
600 	 * structure. Other VSIs will be dynamically allocated as needed.
601 	 */
602 	ice_setup_pf_vsi(sc);
603 
604 	err = ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max,
605 	    scctx->isc_nrxqsets_max);
606 	if (err) {
607 		device_printf(dev, "Unable to allocate VSI Queue maps\n");
608 		goto free_main_vsi;
609 	}
610 
611 	/* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */
612 	err = ice_allocate_msix(sc);
613 	if (err)
614 		goto free_main_vsi;
615 
616 	return 0;
617 
618 free_main_vsi:
619 	/* ice_release_vsi will free the queue maps if they were allocated */
620 	ice_release_vsi(&sc->pf_vsi);
621 	free(sc->all_vsi, M_ICE);
622 	sc->all_vsi = NULL;
623 free_intr_tracking:
624 	ice_free_intr_tracking(sc);
625 free_rx_qmgr:
626 	ice_resmgr_destroy(&sc->rx_qmgr);
627 free_tx_qmgr:
628 	ice_resmgr_destroy(&sc->tx_qmgr);
629 deinit_hw:
630 	ice_deinit_hw(hw);
631 free_pci_mapping:
632 	ice_free_pci_mapping(sc);
633 destroy_admin_timer:
634 	mtx_lock(&sc->admin_mtx);
635 	callout_stop(&sc->admin_timer);
636 	mtx_unlock(&sc->admin_mtx);
637 	mtx_destroy(&sc->admin_mtx);
638 	return err;
639 } /* ice_if_attach_pre */
640 
641 /**
642  * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery
643  * @sc: the device private softc
644  *
645  * Loads the device driver in limited Firmware Recovery mode, intended to
646  * allow users to update the firmware to attempt to recover the device.
647  *
648  * @remark We may enter recovery mode in case either (a) the firmware is
649  * detected to be in an invalid state and must be re-programmed, or (b) the
650  * driver detects that the loaded firmware has a non-compatible API version
651  * that the driver cannot operate with.
652  */
653 static int
654 ice_attach_pre_recovery_mode(struct ice_softc *sc)
655 {
656 	ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE);
657 
658 	/* Setup the iflib softc context */
659 	ice_setup_scctx(sc);
660 
661 	/* Setup the PF VSI back pointer */
662 	sc->pf_vsi.sc = sc;
663 
664 	/*
665 	 * We still need to allocate MSI-X vectors since we need one vector to
666 	 * run the administrative admin interrupt
667 	 */
668 	return ice_allocate_msix(sc);
669 }
670 
671 /**
672  * ice_update_link_status - notify OS of link state change
673  * @sc: device private softc structure
674  * @update_media: true if we should update media even if link didn't change
675  *
676  * Called to notify iflib core of link status changes. Should be called once
677  * during attach_post, and whenever link status changes during runtime.
678  *
679  * This call only updates the currently supported media types if the link
680  * status changed, or if update_media is set to true.
681  */
682 static void
683 ice_update_link_status(struct ice_softc *sc, bool update_media)
684 {
685 	struct ice_hw *hw = &sc->hw;
686 	enum ice_status status;
687 
688 	/* Never report link up when in recovery mode */
689 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
690 		return;
691 
692 	/* Report link status to iflib only once each time it changes */
693 	if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) {
694 		if (sc->link_up) { /* link is up */
695 			uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info);
696 
697 			ice_set_default_local_lldp_mib(sc);
698 
699 			iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
700 			ice_rdma_link_change(sc, LINK_STATE_UP, baudrate);
701 
702 			ice_link_up_msg(sc);
703 
704 			update_media = true;
705 		} else { /* link is down */
706 			iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
707 			ice_rdma_link_change(sc, LINK_STATE_DOWN, 0);
708 
709 			update_media = true;
710 		}
711 	}
712 
713 	/* Update the supported media types */
714 	if (update_media) {
715 		status = ice_add_media_types(sc, sc->media);
716 		if (status)
717 			device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
718 				      ice_status_str(status),
719 				      ice_aq_str(hw->adminq.sq_last_status));
720 	}
721 
722 	/* TODO: notify VFs of link state change */
723 }
724 
725 /**
726  * ice_if_attach_post - Late device attach logic
727  * @ctx: the iflib context structure
728  *
729  * Called by iflib to finish up attaching the device. Performs any attach
730  * logic which must wait until after the Tx and Rx queues have been
731  * allocated.
732  */
733 static int
734 ice_if_attach_post(if_ctx_t ctx)
735 {
736 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
737 	if_t ifp = iflib_get_ifp(ctx);
738 	int err;
739 
740 	ASSERT_CTX_LOCKED(sc);
741 
742 	/* We don't yet support loading if MSI-X is not supported */
743 	if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) {
744 		device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n");
745 		return (ENOTSUP);
746 	}
747 
748 	/* The ifnet structure hasn't yet been initialized when the attach_pre
749 	 * handler is called, so wait until attach_post to setup the
750 	 * isc_max_frame_size.
751 	 */
752 
753 	sc->ifp = ifp;
754 	sc->scctx->isc_max_frame_size = if_getmtu(ifp) +
755 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
756 
757 	/*
758 	 * If we are in recovery mode, only perform a limited subset of
759 	 * initialization to support NVM recovery.
760 	 */
761 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
762 		ice_attach_post_recovery_mode(sc);
763 		return (0);
764 	}
765 
766 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
767 
768 	err = ice_initialize_vsi(&sc->pf_vsi);
769 	if (err) {
770 		device_printf(sc->dev, "Unable to initialize Main VSI: %s\n",
771 			      ice_err_str(err));
772 		return err;
773 	}
774 
775 	/* Enable FW health event reporting */
776 	ice_init_health_events(sc);
777 
778 	/* Configure the main PF VSI for RSS */
779 	err = ice_config_rss(&sc->pf_vsi);
780 	if (err) {
781 		device_printf(sc->dev,
782 			      "Unable to configure RSS for the main VSI, err %s\n",
783 			      ice_err_str(err));
784 		return err;
785 	}
786 
787 	/* Configure switch to drop transmitted LLDP and PAUSE frames */
788 	err = ice_cfg_pf_ethertype_filters(sc);
789 	if (err)
790 		return err;
791 
792 	ice_get_and_print_bus_info(sc);
793 
794 	ice_set_link_management_mode(sc);
795 
796 	ice_init_saved_phy_cfg(sc);
797 
798 	ice_cfg_pba_num(sc);
799 
800 	ice_add_device_sysctls(sc);
801 
802 	/* Get DCBX/LLDP state and start DCBX agent */
803 	ice_init_dcb_setup(sc);
804 
805 	/* Setup link configuration parameters */
806 	ice_init_link_configuration(sc);
807 	ice_update_link_status(sc, true);
808 
809 	/* Configure interrupt causes for the administrative interrupt */
810 	ice_configure_misc_interrupts(sc);
811 
812 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
813 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
814 
815 	err = ice_rdma_pf_attach(sc);
816 	if (err)
817 		return (err);
818 
819 	/* Start the admin timer */
820 	mtx_lock(&sc->admin_mtx);
821 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
822 	mtx_unlock(&sc->admin_mtx);
823 
824 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
825 
826 	return 0;
827 } /* ice_if_attach_post */
828 
829 /**
830  * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery
831  * @sc: the device private softc
832  *
833  * Performs minimal work to prepare the driver to recover an NVM in case the
834  * firmware is in recovery mode.
835  */
836 static void
837 ice_attach_post_recovery_mode(struct ice_softc *sc)
838 {
839 	/* Configure interrupt causes for the administrative interrupt */
840 	ice_configure_misc_interrupts(sc);
841 
842 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
843 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
844 
845 	/* Start the admin timer */
846 	mtx_lock(&sc->admin_mtx);
847 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
848 	mtx_unlock(&sc->admin_mtx);
849 
850 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
851 }
852 
853 /**
854  * ice_free_irqvs - Free IRQ vector memory
855  * @sc: the device private softc structure
856  *
857  * Free IRQ vector memory allocated during ice_if_msix_intr_assign.
858  */
859 static void
860 ice_free_irqvs(struct ice_softc *sc)
861 {
862 	struct ice_vsi *vsi = &sc->pf_vsi;
863 	if_ctx_t ctx = sc->ctx;
864 	int i;
865 
866 	/* If the irqvs array is NULL, then there are no vectors to free */
867 	if (sc->irqvs == NULL)
868 		return;
869 
870 	/* Free the IRQ vectors */
871 	for (i = 0; i < sc->num_irq_vectors; i++)
872 		iflib_irq_free(ctx, &sc->irqvs[i].irq);
873 
874 	/* Clear the irqv pointers */
875 	for (i = 0; i < vsi->num_rx_queues; i++)
876 		vsi->rx_queues[i].irqv = NULL;
877 
878 	for (i = 0; i < vsi->num_tx_queues; i++)
879 		vsi->tx_queues[i].irqv = NULL;
880 
881 	/* Release the vector array memory */
882 	free(sc->irqvs, M_ICE);
883 	sc->irqvs = NULL;
884 	sc->num_irq_vectors = 0;
885 }
886 
887 /**
888  * ice_if_detach - Device driver detach logic
889  * @ctx: iflib context structure
890  *
891  * Perform device shutdown logic to detach the device driver.
892  *
893  * Note that there is no guarantee of the ordering of ice_if_queues_free() and
894  * ice_if_detach(). It is possible for the functions to be called in either
895  * order, and they must not assume to have a strict ordering.
896  */
897 static int
898 ice_if_detach(if_ctx_t ctx)
899 {
900 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
901 	struct ice_vsi *vsi = &sc->pf_vsi;
902 	int i;
903 
904 	ASSERT_CTX_LOCKED(sc);
905 
906 	/* Indicate that we're detaching */
907 	ice_set_state(&sc->state, ICE_STATE_DETACHING);
908 
909 	/* Stop the admin timer */
910 	mtx_lock(&sc->admin_mtx);
911 	callout_stop(&sc->admin_timer);
912 	mtx_unlock(&sc->admin_mtx);
913 	mtx_destroy(&sc->admin_mtx);
914 
915 	ice_rdma_pf_detach(sc);
916 
917 	/* Free allocated media types */
918 	ifmedia_removeall(sc->media);
919 
920 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
921 	 * pointers. Note, the calls here and those in ice_if_queues_free()
922 	 * are *BOTH* necessary, as we cannot guarantee which path will be
923 	 * run first
924 	 */
925 	ice_vsi_del_txqs_ctx(vsi);
926 	ice_vsi_del_rxqs_ctx(vsi);
927 
928 	/* Release MSI-X resources */
929 	ice_free_irqvs(sc);
930 
931 	for (i = 0; i < sc->num_available_vsi; i++) {
932 		if (sc->all_vsi[i])
933 			ice_release_vsi(sc->all_vsi[i]);
934 	}
935 
936 	if (sc->all_vsi) {
937 		free(sc->all_vsi, M_ICE);
938 		sc->all_vsi = NULL;
939 	}
940 
941 	/* Release MSI-X memory */
942 	pci_release_msi(sc->dev);
943 
944 	if (sc->msix_table != NULL) {
945 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
946 				     rman_get_rid(sc->msix_table),
947 				     sc->msix_table);
948 		sc->msix_table = NULL;
949 	}
950 
951 	ice_free_intr_tracking(sc);
952 
953 	/* Destroy the queue managers */
954 	ice_resmgr_destroy(&sc->tx_qmgr);
955 	ice_resmgr_destroy(&sc->rx_qmgr);
956 
957 	if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
958 		ice_deinit_hw(&sc->hw);
959 
960 	ice_free_pci_mapping(sc);
961 
962 	return 0;
963 } /* ice_if_detach */
964 
965 /**
966  * ice_if_tx_queues_alloc - Allocate Tx queue memory
967  * @ctx: iflib context structure
968  * @vaddrs: virtual addresses for the queue memory
969  * @paddrs: physical addresses for the queue memory
970  * @ntxqs: the number of Tx queues per set (should always be 1)
971  * @ntxqsets: the number of Tx queue sets to allocate
972  *
973  * Called by iflib to allocate Tx queues for the device. Allocates driver
974  * memory to track each queue, the status arrays used for descriptor
975  * status reporting, and Tx queue sysctls.
976  */
977 static int
978 ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
979 		       int __invariant_only ntxqs, int ntxqsets)
980 {
981 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
982 	struct ice_vsi *vsi = &sc->pf_vsi;
983 	struct ice_tx_queue *txq;
984 	int err, i, j;
985 
986 	MPASS(ntxqs == 1);
987 	MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
988 	ASSERT_CTX_LOCKED(sc);
989 
990 	/* Do not bother allocating queues if we're in recovery mode */
991 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
992 		return (0);
993 
994 	/* Allocate queue structure memory */
995 	if (!(vsi->tx_queues =
996 	      (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
997 		device_printf(sc->dev, "Unable to allocate Tx queue memory\n");
998 		return (ENOMEM);
999 	}
1000 
1001 	/* Allocate report status arrays */
1002 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1003 		if (!(txq->tx_rsq =
1004 		      (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
1005 			device_printf(sc->dev, "Unable to allocate tx_rsq memory\n");
1006 			err = ENOMEM;
1007 			goto free_tx_queues;
1008 		}
1009 		/* Initialize report status array */
1010 		for (j = 0; j < sc->scctx->isc_ntxd[0]; j++)
1011 			txq->tx_rsq[j] = QIDX_INVALID;
1012 	}
1013 
1014 	/* Assign queues from PF space to the main VSI */
1015 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets);
1016 	if (err) {
1017 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1018 			      ice_err_str(err));
1019 		goto free_tx_queues;
1020 	}
1021 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1022 
1023 	/* Add Tx queue sysctls context */
1024 	ice_vsi_add_txqs_ctx(vsi);
1025 
1026 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1027 		/* q_handle == me when only one TC */
1028 		txq->me = txq->q_handle = i;
1029 		txq->vsi = vsi;
1030 
1031 		/* store the queue size for easier access */
1032 		txq->desc_count = sc->scctx->isc_ntxd[0];
1033 
1034 		/* get the virtual and physical address of the hardware queues */
1035 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
1036 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
1037 		txq->tx_paddr = paddrs[i];
1038 
1039 		ice_add_txq_sysctls(txq);
1040 	}
1041 
1042 	vsi->num_tx_queues = ntxqsets;
1043 
1044 	return (0);
1045 
1046 free_tx_queues:
1047 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1048 		if (txq->tx_rsq != NULL) {
1049 			free(txq->tx_rsq, M_ICE);
1050 			txq->tx_rsq = NULL;
1051 		}
1052 	}
1053 	free(vsi->tx_queues, M_ICE);
1054 	vsi->tx_queues = NULL;
1055 	return err;
1056 }
1057 
1058 /**
1059  * ice_if_rx_queues_alloc - Allocate Rx queue memory
1060  * @ctx: iflib context structure
1061  * @vaddrs: virtual addresses for the queue memory
1062  * @paddrs: physical addresses for the queue memory
1063  * @nrxqs: number of Rx queues per set (should always be 1)
1064  * @nrxqsets: number of Rx queue sets to allocate
1065  *
1066  * Called by iflib to allocate Rx queues for the device. Allocates driver
1067  * memory to track each queue, as well as sets up the Rx queue sysctls.
1068  */
1069 static int
1070 ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1071 		       int __invariant_only nrxqs, int nrxqsets)
1072 {
1073 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1074 	struct ice_vsi *vsi = &sc->pf_vsi;
1075 	struct ice_rx_queue *rxq;
1076 	int err, i;
1077 
1078 	MPASS(nrxqs == 1);
1079 	MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
1080 	ASSERT_CTX_LOCKED(sc);
1081 
1082 	/* Do not bother allocating queues if we're in recovery mode */
1083 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1084 		return (0);
1085 
1086 	/* Allocate queue structure memory */
1087 	if (!(vsi->rx_queues =
1088 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1089 		device_printf(sc->dev, "Unable to allocate Rx queue memory\n");
1090 		return (ENOMEM);
1091 	}
1092 
1093 	/* Assign queues from PF space to the main VSI */
1094 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets);
1095 	if (err) {
1096 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1097 			      ice_err_str(err));
1098 		goto free_rx_queues;
1099 	}
1100 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1101 
1102 	/* Add Rx queue sysctls context */
1103 	ice_vsi_add_rxqs_ctx(vsi);
1104 
1105 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
1106 		rxq->me = i;
1107 		rxq->vsi = vsi;
1108 
1109 		/* store the queue size for easier access */
1110 		rxq->desc_count = sc->scctx->isc_nrxd[0];
1111 
1112 		/* get the virtual and physical address of the hardware queues */
1113 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
1114 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
1115 		rxq->rx_paddr = paddrs[i];
1116 
1117 		ice_add_rxq_sysctls(rxq);
1118 	}
1119 
1120 	vsi->num_rx_queues = nrxqsets;
1121 
1122 	return (0);
1123 
1124 free_rx_queues:
1125 	free(vsi->rx_queues, M_ICE);
1126 	vsi->rx_queues = NULL;
1127 	return err;
1128 }
1129 
1130 /**
1131  * ice_if_queues_free - Free queue memory
1132  * @ctx: the iflib context structure
1133  *
1134  * Free queue memory allocated by ice_if_tx_queues_alloc() and
1135  * ice_if_rx_queues_alloc().
1136  *
1137  * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be
1138  * called in the same order. It's possible for ice_if_queues_free() to be
1139  * called prior to ice_if_detach(), and vice versa.
1140  *
1141  * For this reason, the main VSI is a static member of the ice_softc, which is
1142  * not free'd until after iflib finishes calling both of these functions.
1143  *
1144  * Thus, care must be taken in how we manage the memory being freed by this
1145  * function, and in what tasks it can and must perform.
1146  */
1147 static void
1148 ice_if_queues_free(if_ctx_t ctx)
1149 {
1150 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1151 	struct ice_vsi *vsi = &sc->pf_vsi;
1152 	struct ice_tx_queue *txq;
1153 	int i;
1154 
1155 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
1156 	 * pointers. Note, the calls here and those in ice_if_detach()
1157 	 * are *BOTH* necessary, as we cannot guarantee which path will be
1158 	 * run first
1159 	 */
1160 	ice_vsi_del_txqs_ctx(vsi);
1161 	ice_vsi_del_rxqs_ctx(vsi);
1162 
1163 	/* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */
1164 	ice_free_irqvs(sc);
1165 
1166 	if (vsi->tx_queues != NULL) {
1167 		/* free the tx_rsq arrays */
1168 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1169 			if (txq->tx_rsq != NULL) {
1170 				free(txq->tx_rsq, M_ICE);
1171 				txq->tx_rsq = NULL;
1172 			}
1173 		}
1174 		free(vsi->tx_queues, M_ICE);
1175 		vsi->tx_queues = NULL;
1176 		vsi->num_tx_queues = 0;
1177 	}
1178 	if (vsi->rx_queues != NULL) {
1179 		free(vsi->rx_queues, M_ICE);
1180 		vsi->rx_queues = NULL;
1181 		vsi->num_rx_queues = 0;
1182 	}
1183 }
1184 
1185 /**
1186  * ice_msix_que - Fast interrupt handler for MSI-X receive queues
1187  * @arg: The Rx queue memory
1188  *
1189  * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when
1190  * an MSI-X interrupt for a given queue is triggered. Currently this just asks
1191  * iflib to schedule the main Rx thread.
1192  */
1193 static int
1194 ice_msix_que(void *arg)
1195 {
1196 	struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg;
1197 
1198 	/* TODO: dynamic ITR algorithm?? */
1199 
1200 	return (FILTER_SCHEDULE_THREAD);
1201 }
1202 
1203 /**
1204  * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt
1205  * @arg: pointer to device softc memory
1206  *
1207  * Called by iflib when an administrative interrupt occurs. Should perform any
1208  * fast logic for handling the interrupt cause, and then indicate whether the
1209  * admin task needs to be queued.
1210  */
1211 static int
1212 ice_msix_admin(void *arg)
1213 {
1214 	struct ice_softc *sc = (struct ice_softc *)arg;
1215 	struct ice_hw *hw = &sc->hw;
1216 	device_t dev = sc->dev;
1217 	u32 oicr;
1218 
1219 	/* There is no safe way to modify the enabled miscellaneous causes of
1220 	 * the OICR vector at runtime, as doing so would be prone to race
1221 	 * conditions. Reading PFINT_OICR will unmask the associated interrupt
1222 	 * causes and allow future interrupts to occur. The admin interrupt
1223 	 * vector will not be re-enabled until after we exit this function,
1224 	 * but any delayed tasks must be resilient against possible "late
1225 	 * arrival" interrupts that occur while we're already handling the
1226 	 * task. This is done by using state bits and serializing these
1227 	 * delayed tasks via the admin status task function.
1228 	 */
1229 	oicr = rd32(hw, PFINT_OICR);
1230 
1231 	/* Processing multiple controlq interrupts on a single vector does not
1232 	 * provide an indication of which controlq triggered the interrupt.
1233 	 * We might try reading the INTEVENT bit of the respective PFINT_*_CTL
1234 	 * registers. However, the INTEVENT bit is not guaranteed to be set as
1235 	 * it gets automatically cleared when the hardware acknowledges the
1236 	 * interrupt.
1237 	 *
1238 	 * This means we don't really have a good indication of whether or
1239 	 * which controlq triggered this interrupt. We'll just notify the
1240 	 * admin task that it should check all the controlqs.
1241 	 */
1242 	ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
1243 
1244 	if (oicr & PFINT_OICR_VFLR_M) {
1245 		ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING);
1246 	}
1247 
1248 	if (oicr & PFINT_OICR_MAL_DETECT_M) {
1249 		ice_set_state(&sc->state, ICE_STATE_MDD_PENDING);
1250 	}
1251 
1252 	if (oicr & PFINT_OICR_GRST_M) {
1253 		u32 reset;
1254 
1255 		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
1256 			GLGEN_RSTAT_RESET_TYPE_S;
1257 
1258 		if (reset == ICE_RESET_CORER)
1259 			sc->soft_stats.corer_count++;
1260 		else if (reset == ICE_RESET_GLOBR)
1261 			sc->soft_stats.globr_count++;
1262 		else
1263 			sc->soft_stats.empr_count++;
1264 
1265 		/* There are a couple of bits at play for handling resets.
1266 		 * First, the ICE_STATE_RESET_OICR_RECV bit is used to
1267 		 * indicate that the driver has received an OICR with a reset
1268 		 * bit active, indicating that a CORER/GLOBR/EMPR is about to
1269 		 * happen. Second, we set hw->reset_ongoing to indicate that
1270 		 * the hardware is in reset. We will set this back to false as
1271 		 * soon as the driver has determined that the hardware is out
1272 		 * of reset.
1273 		 *
1274 		 * If the driver wishes to trigger a request, it can set one of
1275 		 * the ICE_STATE_RESET_*_REQ bits, which will trigger the
1276 		 * correct type of reset.
1277 		 */
1278 		if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
1279 			hw->reset_ongoing = true;
1280 	}
1281 
1282 	if (oicr & PFINT_OICR_ECC_ERR_M) {
1283 		device_printf(dev, "ECC Error detected!\n");
1284 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1285 	}
1286 
1287 	if (oicr & PFINT_OICR_PE_CRITERR_M) {
1288 		device_printf(dev, "Critical Protocol Engine Error detected!\n");
1289 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1290 	}
1291 
1292 	if (oicr & PFINT_OICR_PCI_EXCEPTION_M) {
1293 		device_printf(dev, "PCI Exception detected!\n");
1294 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1295 	}
1296 
1297 	if (oicr & PFINT_OICR_HMC_ERR_M) {
1298 		/* Log the HMC errors, but don't disable the interrupt cause */
1299 		ice_log_hmc_error(hw, dev);
1300 	}
1301 
1302 	return (FILTER_SCHEDULE_THREAD);
1303 }
1304 
1305 /**
1306  * ice_allocate_msix - Allocate MSI-X vectors for the interface
1307  * @sc: the device private softc
1308  *
1309  * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process.
1310  *
1311  * First, determine a suitable total number of vectors based on the number
1312  * of CPUs, RSS buckets, the administrative vector, and other demands such as
1313  * RDMA.
1314  *
1315  * Request the desired amount of vectors, and see how many we obtain. If we
1316  * don't obtain as many as desired, reduce the demands by lowering the number
1317  * of requested queues or reducing the demand from other features such as
1318  * RDMA.
1319  *
1320  * @remark This function is required because the driver sets the
1321  * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors
1322  * manually.
1323  *
1324  * @remark This driver will only use MSI-X vectors. If this is not possible,
1325  * neither MSI or legacy interrupts will be tried.
1326  *
1327  * @post on success this function must set the following scctx parameters:
1328  * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr.
1329  *
1330  * @returns zero on success or an error code on failure.
1331  */
1332 static int
1333 ice_allocate_msix(struct ice_softc *sc)
1334 {
1335 	bool iflib_override_queue_count = false;
1336 	if_softc_ctx_t scctx = sc->scctx;
1337 	device_t dev = sc->dev;
1338 	cpuset_t cpus;
1339 	int bar, queues, vectors, requested;
1340 	int err = 0;
1341 	int rdma;
1342 
1343 	/* Allocate the MSI-X bar */
1344 	bar = scctx->isc_msix_bar;
1345 	sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE);
1346 	if (!sc->msix_table) {
1347 		device_printf(dev, "Unable to map MSI-X table\n");
1348 		return (ENOMEM);
1349 	}
1350 
1351 	/* Check if the iflib queue count sysctls have been set */
1352 	if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs)
1353 		iflib_override_queue_count = true;
1354 
1355 	err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus);
1356 	if (err) {
1357 		device_printf(dev, "%s: Unable to fetch the CPU list: %s\n",
1358 			      __func__, ice_err_str(err));
1359 		CPU_COPY(&all_cpus, &cpus);
1360 	}
1361 
1362 	/* Attempt to mimic behavior of iflib_msix_init */
1363 	if (iflib_override_queue_count) {
1364 		/*
1365 		 * If the override sysctls have been set, limit the queues to
1366 		 * the number of logical CPUs.
1367 		 */
1368 		queues = mp_ncpus;
1369 	} else {
1370 		/*
1371 		 * Otherwise, limit the queue count to the CPUs associated
1372 		 * with the NUMA node the device is associated with.
1373 		 */
1374 		queues = CPU_COUNT(&cpus);
1375 	}
1376 
1377 	/* Clamp to the number of RSS buckets */
1378 	queues = imin(queues, rss_getnumbuckets());
1379 
1380 	/*
1381 	 * Clamp the number of queue pairs to the minimum of the requested Tx
1382 	 * and Rx queues.
1383 	 */
1384 	queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
1385 	queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
1386 
1387 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) {
1388 		/*
1389 		 * Choose a number of RDMA vectors based on the number of CPUs
1390 		 * up to a maximum
1391 		 */
1392 		rdma = min(CPU_COUNT(&cpus), ICE_RDMA_MAX_MSIX);
1393 
1394 		/* Further limit by the user configurable tunable */
1395 		rdma = min(rdma, ice_rdma_max_msix);
1396 	} else {
1397 		rdma = 0;
1398 	}
1399 
1400 	/*
1401 	 * Determine the number of vectors to request. Note that we also need
1402 	 * to allocate one vector for administrative tasks.
1403 	 */
1404 	requested = rdma + queues + 1;
1405 
1406 	vectors = requested;
1407 
1408 	err = pci_alloc_msix(dev, &vectors);
1409 	if (err) {
1410 		device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n",
1411 			      vectors, ice_err_str(err));
1412 		goto err_free_msix_table;
1413 	}
1414 
1415 	/* If we don't receive enough vectors, reduce demands */
1416 	if (vectors < requested) {
1417 		int diff = requested - vectors;
1418 
1419 		device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n",
1420 			      requested, vectors);
1421 
1422 		/*
1423 		 * The OS didn't grant us the requested number of vectors.
1424 		 * Check to see if we can reduce demands by limiting the
1425 		 * number of vectors allocated to certain features.
1426 		 */
1427 
1428 		if (rdma >= diff) {
1429 			/* Reduce the number of RDMA vectors we reserve */
1430 			rdma -= diff;
1431 			diff = 0;
1432 		} else {
1433 			/* Disable RDMA and reduce the difference */
1434 			ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
1435 			diff -= rdma;
1436 			rdma = 0;
1437 		}
1438 
1439 		/*
1440 		 * If we still have a difference, we need to reduce the number
1441 		 * of queue pairs.
1442 		 *
1443 		 * However, we still need at least one vector for the admin
1444 		 * interrupt and one queue pair.
1445 		 */
1446 		if (queues <= diff) {
1447 			device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n");
1448 			err = (ERANGE);
1449 			goto err_pci_release_msi;
1450 		}
1451 
1452 		queues -= diff;
1453 	}
1454 
1455 	device_printf(dev, "Using %d Tx and Rx queues\n", queues);
1456 	if (rdma)
1457 		device_printf(dev, "Reserving %d MSI-X interrupts for iRDMA\n",
1458 			      rdma);
1459 	device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
1460 		      vectors);
1461 
1462 	scctx->isc_vectors = vectors;
1463 	scctx->isc_nrxqsets = queues;
1464 	scctx->isc_ntxqsets = queues;
1465 	scctx->isc_intr = IFLIB_INTR_MSIX;
1466 
1467 	sc->irdma_vectors = rdma;
1468 
1469 	/* Interrupt allocation tracking isn't required in recovery mode,
1470 	 * since neither RDMA nor VFs are enabled.
1471 	 */
1472 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1473 		return (0);
1474 
1475 	/* Keep track of which interrupt indices are being used for what */
1476 	sc->lan_vectors = vectors - rdma;
1477 	err = ice_resmgr_assign_contiguous(&sc->imgr, sc->pf_imap, sc->lan_vectors);
1478 	if (err) {
1479 		device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
1480 			      ice_err_str(err));
1481 		goto err_pci_release_msi;
1482 	}
1483 	err = ice_resmgr_assign_contiguous(&sc->imgr, sc->rdma_imap, rdma);
1484 	if (err) {
1485 		device_printf(dev, "Unable to assign PF RDMA interrupt mapping: %s\n",
1486 			      ice_err_str(err));
1487 		ice_resmgr_release_map(&sc->imgr, sc->pf_imap,
1488 					    sc->lan_vectors);
1489 		goto err_pci_release_msi;
1490 	}
1491 
1492 	return (0);
1493 
1494 err_pci_release_msi:
1495 	pci_release_msi(dev);
1496 err_free_msix_table:
1497 	if (sc->msix_table != NULL) {
1498 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
1499 				rman_get_rid(sc->msix_table),
1500 				sc->msix_table);
1501 		sc->msix_table = NULL;
1502 	}
1503 
1504 	return (err);
1505 }
1506 
1507 /**
1508  * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues
1509  * @ctx: the iflib context structure
1510  * @msix: the number of vectors we were assigned
1511  *
1512  * Called by iflib to assign MSI-X vectors to queues. Currently requires that
1513  * we get at least the same number of vectors as we have queues, and that we
1514  * always have the same number of Tx and Rx queues.
1515  *
1516  * Tx queues use a softirq instead of using their own hardware interrupt.
1517  */
1518 static int
1519 ice_if_msix_intr_assign(if_ctx_t ctx, int msix)
1520 {
1521 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1522 	struct ice_vsi *vsi = &sc->pf_vsi;
1523 	int err, i, vector;
1524 
1525 	ASSERT_CTX_LOCKED(sc);
1526 
1527 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
1528 		device_printf(sc->dev,
1529 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
1530 			      vsi->num_tx_queues, vsi->num_rx_queues);
1531 		return (EOPNOTSUPP);
1532 	}
1533 
1534 	if (msix < (vsi->num_rx_queues + 1)) {
1535 		device_printf(sc->dev,
1536 			      "Not enough MSI-X vectors to assign one vector to each queue pair\n");
1537 		return (EOPNOTSUPP);
1538 	}
1539 
1540 	/* Save the number of vectors for future use */
1541 	sc->num_irq_vectors = vsi->num_rx_queues + 1;
1542 
1543 	/* Allocate space to store the IRQ vector data */
1544 	if (!(sc->irqvs =
1545 	      (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors),
1546 					       M_ICE, M_NOWAIT))) {
1547 		device_printf(sc->dev,
1548 			      "Unable to allocate irqv memory\n");
1549 		return (ENOMEM);
1550 	}
1551 
1552 	/* Administrative interrupt events will use vector 0 */
1553 	err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN,
1554 				      ice_msix_admin, sc, 0, "admin");
1555 	if (err) {
1556 		device_printf(sc->dev,
1557 			      "Failed to register Admin queue handler: %s\n",
1558 			      ice_err_str(err));
1559 		goto free_irqvs;
1560 	}
1561 	sc->irqvs[0].me = 0;
1562 
1563 	/* Do not allocate queue interrupts when in recovery mode */
1564 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1565 		return (0);
1566 
1567 	for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) {
1568 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1569 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1570 		int rid = vector + 1;
1571 		char irq_name[16];
1572 
1573 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
1574 		err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid,
1575 					      IFLIB_INTR_RXTX, ice_msix_que,
1576 					      rxq, rxq->me, irq_name);
1577 		if (err) {
1578 			device_printf(sc->dev,
1579 				      "Failed to allocate q int %d err: %s\n",
1580 				      i, ice_err_str(err));
1581 			vector--;
1582 			i--;
1583 			goto fail;
1584 		}
1585 		sc->irqvs[vector].me = vector;
1586 		rxq->irqv = &sc->irqvs[vector];
1587 
1588 		bzero(irq_name, sizeof(irq_name));
1589 
1590 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
1591 		iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq,
1592 					    IFLIB_INTR_TX, txq,
1593 					    txq->me, irq_name);
1594 		txq->irqv = &sc->irqvs[vector];
1595 	}
1596 
1597 	return (0);
1598 fail:
1599 	for (; i >= 0; i--, vector--)
1600 		iflib_irq_free(ctx, &sc->irqvs[vector].irq);
1601 	iflib_irq_free(ctx, &sc->irqvs[0].irq);
1602 free_irqvs:
1603 	free(sc->irqvs, M_ICE);
1604 	sc->irqvs = NULL;
1605 	return err;
1606 }
1607 
1608 /**
1609  * ice_if_mtu_set - Set the device MTU
1610  * @ctx: iflib context structure
1611  * @mtu: the MTU requested
1612  *
1613  * Called by iflib to configure the device's Maximum Transmission Unit (MTU).
1614  *
1615  * @pre assumes the caller holds the iflib CTX lock
1616  */
1617 static int
1618 ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu)
1619 {
1620 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1621 
1622 	ASSERT_CTX_LOCKED(sc);
1623 
1624 	/* Do not support configuration when in recovery mode */
1625 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1626 		return (ENOSYS);
1627 
1628 	if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU)
1629 		return (EINVAL);
1630 
1631 	sc->scctx->isc_max_frame_size = mtu +
1632 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1633 
1634 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
1635 
1636 	return (0);
1637 }
1638 
1639 /**
1640  * ice_if_intr_enable - Enable device interrupts
1641  * @ctx: iflib context structure
1642  *
1643  * Called by iflib to request enabling device interrupts.
1644  */
1645 static void
1646 ice_if_intr_enable(if_ctx_t ctx)
1647 {
1648 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1649 	struct ice_vsi *vsi = &sc->pf_vsi;
1650 	struct ice_hw *hw = &sc->hw;
1651 
1652 	ASSERT_CTX_LOCKED(sc);
1653 
1654 	/* Enable ITR 0 */
1655 	ice_enable_intr(hw, sc->irqvs[0].me);
1656 
1657 	/* Do not enable queue interrupts in recovery mode */
1658 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1659 		return;
1660 
1661 	/* Enable all queue interrupts */
1662 	for (int i = 0; i < vsi->num_rx_queues; i++)
1663 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
1664 }
1665 
1666 /**
1667  * ice_if_intr_disable - Disable device interrupts
1668  * @ctx: iflib context structure
1669  *
1670  * Called by iflib to request disabling device interrupts.
1671  */
1672 static void
1673 ice_if_intr_disable(if_ctx_t ctx)
1674 {
1675 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1676 	struct ice_hw *hw = &sc->hw;
1677 	unsigned int i;
1678 
1679 	ASSERT_CTX_LOCKED(sc);
1680 
1681 	/* IFDI_INTR_DISABLE may be called prior to interrupts actually being
1682 	 * assigned to queues. Instead of assuming that the interrupt
1683 	 * assignment in the rx_queues structure is valid, just disable all
1684 	 * possible interrupts
1685 	 *
1686 	 * Note that we choose not to disable ITR 0 because this handles the
1687 	 * AdminQ interrupts, and we want to keep processing these even when
1688 	 * the interface is offline.
1689 	 */
1690 	for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++)
1691 		ice_disable_intr(hw, i);
1692 }
1693 
1694 /**
1695  * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
1696  * @ctx: iflib context structure
1697  * @rxqid: the Rx queue to enable
1698  *
1699  * Enable a specific Rx queue interrupt.
1700  *
1701  * This function is not protected by the iflib CTX lock.
1702  */
1703 static int
1704 ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
1705 {
1706 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1707 	struct ice_vsi *vsi = &sc->pf_vsi;
1708 	struct ice_hw *hw = &sc->hw;
1709 
1710 	/* Do not enable queue interrupts in recovery mode */
1711 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1712 		return (ENOSYS);
1713 
1714 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
1715 	return (0);
1716 }
1717 
1718 /**
1719  * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
1720  * @ctx: iflib context structure
1721  * @txqid: the Tx queue to enable
1722  *
1723  * Enable a specific Tx queue interrupt.
1724  *
1725  * This function is not protected by the iflib CTX lock.
1726  */
1727 static int
1728 ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
1729 {
1730 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1731 	struct ice_vsi *vsi = &sc->pf_vsi;
1732 	struct ice_hw *hw = &sc->hw;
1733 
1734 	/* Do not enable queue interrupts in recovery mode */
1735 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1736 		return (ENOSYS);
1737 
1738 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
1739 	return (0);
1740 }
1741 
1742 /**
1743  * ice_if_promisc_set - Set device promiscuous mode
1744  * @ctx: iflib context structure
1745  * @flags: promiscuous flags to configure
1746  *
1747  * Called by iflib to configure device promiscuous mode.
1748  *
1749  * @remark Calls to this function will always overwrite the previous setting
1750  */
1751 static int
1752 ice_if_promisc_set(if_ctx_t ctx, int flags)
1753 {
1754 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1755 	struct ice_hw *hw = &sc->hw;
1756 	device_t dev = sc->dev;
1757 	enum ice_status status;
1758 	bool promisc_enable = flags & IFF_PROMISC;
1759 	bool multi_enable = flags & IFF_ALLMULTI;
1760 
1761 	/* Do not support configuration when in recovery mode */
1762 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1763 		return (ENOSYS);
1764 
1765 	if (multi_enable)
1766 		return (EOPNOTSUPP);
1767 
1768 	if (promisc_enable) {
1769 		status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx,
1770 					     ICE_VSI_PROMISC_MASK, 0);
1771 		if (status && status != ICE_ERR_ALREADY_EXISTS) {
1772 			device_printf(dev,
1773 				      "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n",
1774 				      ice_status_str(status),
1775 				      ice_aq_str(hw->adminq.sq_last_status));
1776 			return (EIO);
1777 		}
1778 	} else {
1779 		status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx,
1780 					       ICE_VSI_PROMISC_MASK, 0);
1781 		if (status) {
1782 			device_printf(dev,
1783 				      "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n",
1784 				      ice_status_str(status),
1785 				      ice_aq_str(hw->adminq.sq_last_status));
1786 			return (EIO);
1787 		}
1788 	}
1789 
1790 	return (0);
1791 }
1792 
1793 /**
1794  * ice_if_media_change - Change device media
1795  * @ctx: device ctx structure
1796  *
1797  * Called by iflib when a media change is requested. This operation is not
1798  * supported by the hardware, so we just return an error code.
1799  */
1800 static int
1801 ice_if_media_change(if_ctx_t ctx)
1802 {
1803 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1804 
1805 	device_printf(sc->dev, "Media change is not supported.\n");
1806 	return (ENODEV);
1807 }
1808 
1809 /**
1810  * ice_if_media_status - Report current device media
1811  * @ctx: iflib context structure
1812  * @ifmr: ifmedia request structure to update
1813  *
1814  * Updates the provided ifmr with current device media status, including link
1815  * status and media type.
1816  */
1817 static void
1818 ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr)
1819 {
1820 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1821 	struct ice_link_status *li = &sc->hw.port_info->phy.link_info;
1822 
1823 	ifmr->ifm_status = IFM_AVALID;
1824 	ifmr->ifm_active = IFM_ETHER;
1825 
1826 	/* Never report link up or media types when in recovery mode */
1827 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1828 		return;
1829 
1830 	if (!sc->link_up)
1831 		return;
1832 
1833 	ifmr->ifm_status |= IFM_ACTIVE;
1834 	ifmr->ifm_active |= IFM_FDX;
1835 
1836 	if (li->phy_type_low)
1837 		ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low);
1838 	else if (li->phy_type_high)
1839 		ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high);
1840 	else
1841 		ifmr->ifm_active |= IFM_UNKNOWN;
1842 
1843 	/* Report flow control status as well */
1844 	if (li->an_info & ICE_AQ_LINK_PAUSE_TX)
1845 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1846 	if (li->an_info & ICE_AQ_LINK_PAUSE_RX)
1847 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1848 }
1849 
1850 /**
1851  * ice_init_tx_tracking - Initialize Tx queue software tracking values
1852  * @vsi: the VSI to initialize
1853  *
1854  * Initialize Tx queue software tracking values, including the Report Status
1855  * queue, and related software tracking values.
1856  */
1857 static void
1858 ice_init_tx_tracking(struct ice_vsi *vsi)
1859 {
1860 	struct ice_tx_queue *txq;
1861 	size_t j;
1862 	int i;
1863 
1864 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1865 
1866 		txq->tx_rs_cidx = txq->tx_rs_pidx = 0;
1867 
1868 		/* Initialize the last processed descriptor to be the end of
1869 		 * the ring, rather than the start, so that we avoid an
1870 		 * off-by-one error in ice_ift_txd_credits_update for the
1871 		 * first packet.
1872 		 */
1873 		txq->tx_cidx_processed = txq->desc_count - 1;
1874 
1875 		for (j = 0; j < txq->desc_count; j++)
1876 			txq->tx_rsq[j] = QIDX_INVALID;
1877 	}
1878 }
1879 
1880 /**
1881  * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues
1882  * @sc: the device softc
1883  *
1884  * Called to update the Rx queue mbuf_sz parameter for configuring the receive
1885  * buffer sizes when programming hardware.
1886  */
1887 static void
1888 ice_update_rx_mbuf_sz(struct ice_softc *sc)
1889 {
1890 	uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx);
1891 	struct ice_vsi *vsi = &sc->pf_vsi;
1892 
1893 	MPASS(mbuf_sz <= UINT16_MAX);
1894 	vsi->mbuf_sz = mbuf_sz;
1895 }
1896 
1897 /**
1898  * ice_if_init - Initialize the device
1899  * @ctx: iflib ctx structure
1900  *
1901  * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes
1902  * device filters and prepares the Tx and Rx engines.
1903  *
1904  * @pre assumes the caller holds the iflib CTX lock
1905  */
1906 static void
1907 ice_if_init(if_ctx_t ctx)
1908 {
1909 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1910 	device_t dev = sc->dev;
1911 	int err;
1912 
1913 	ASSERT_CTX_LOCKED(sc);
1914 
1915 	/*
1916 	 * We've seen an issue with 11.3/12.1 where sideband routines are
1917 	 * called after detach is called.  This would call routines after
1918 	 * if_stop, causing issues with the teardown process.  This has
1919 	 * seemingly been fixed in STABLE snapshots, but it seems like a
1920 	 * good idea to have this guard here regardless.
1921 	 */
1922 	if (ice_driver_is_detaching(sc))
1923 		return;
1924 
1925 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1926 		return;
1927 
1928 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
1929 		device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n");
1930 		return;
1931 	}
1932 
1933 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
1934 		device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n");
1935 		return;
1936 	}
1937 
1938 	ice_update_rx_mbuf_sz(sc);
1939 
1940 	/* Update the MAC address... User might use a LAA */
1941 	err = ice_update_laa_mac(sc);
1942 	if (err) {
1943 		device_printf(dev,
1944 			      "LAA address change failed, err %s\n",
1945 			      ice_err_str(err));
1946 		return;
1947 	}
1948 
1949 	/* Initialize software Tx tracking values */
1950 	ice_init_tx_tracking(&sc->pf_vsi);
1951 
1952 	err = ice_cfg_vsi_for_tx(&sc->pf_vsi);
1953 	if (err) {
1954 		device_printf(dev,
1955 			      "Unable to configure the main VSI for Tx: %s\n",
1956 			      ice_err_str(err));
1957 		return;
1958 	}
1959 
1960 	err = ice_cfg_vsi_for_rx(&sc->pf_vsi);
1961 	if (err) {
1962 		device_printf(dev,
1963 			      "Unable to configure the main VSI for Rx: %s\n",
1964 			      ice_err_str(err));
1965 		goto err_cleanup_tx;
1966 	}
1967 
1968 	err = ice_control_rx_queues(&sc->pf_vsi, true);
1969 	if (err) {
1970 		device_printf(dev,
1971 			      "Unable to enable Rx rings for transmit: %s\n",
1972 			      ice_err_str(err));
1973 		goto err_cleanup_tx;
1974 	}
1975 
1976 	err = ice_cfg_pf_default_mac_filters(sc);
1977 	if (err) {
1978 		device_printf(dev,
1979 			      "Unable to configure default MAC filters: %s\n",
1980 			      ice_err_str(err));
1981 		goto err_stop_rx;
1982 	}
1983 
1984 	/* We use software interrupts for Tx, so we only program the hardware
1985 	 * interrupts for Rx.
1986 	 */
1987 	ice_configure_rxq_interrupts(&sc->pf_vsi);
1988 	ice_configure_rx_itr(&sc->pf_vsi);
1989 
1990 	/* Configure promiscuous mode */
1991 	ice_if_promisc_set(ctx, if_getflags(sc->ifp));
1992 
1993 	ice_rdma_pf_init(sc);
1994 
1995 	ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
1996 	return;
1997 
1998 err_stop_rx:
1999 	ice_control_rx_queues(&sc->pf_vsi, false);
2000 err_cleanup_tx:
2001 	ice_vsi_disable_tx(&sc->pf_vsi);
2002 }
2003 
2004 /**
2005  * ice_poll_for_media_avail - Re-enable link if media is detected
2006  * @sc: device private structure
2007  *
2008  * Intended to be called from the driver's timer function, this function
2009  * sends the Get Link Status AQ command and re-enables HW link if the
2010  * command says that media is available.
2011  *
2012  * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing,
2013  * since media removal events are supposed to be sent to the driver through
2014  * a link status event.
2015  */
2016 static void
2017 ice_poll_for_media_avail(struct ice_softc *sc)
2018 {
2019 	struct ice_hw *hw = &sc->hw;
2020 	struct ice_port_info *pi = hw->port_info;
2021 
2022 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2023 		pi->phy.get_link_info = true;
2024 		ice_get_link_status(pi, &sc->link_up);
2025 
2026 		if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
2027 			enum ice_status status;
2028 
2029 			/* Re-enable link and re-apply user link settings */
2030 			ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
2031 
2032 			/* Update the OS about changes in media capability */
2033 			status = ice_add_media_types(sc, sc->media);
2034 			if (status)
2035 				device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
2036 					      ice_status_str(status),
2037 					      ice_aq_str(hw->adminq.sq_last_status));
2038 
2039 			ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
2040 		}
2041 	}
2042 }
2043 
2044 /**
2045  * ice_if_timer - called by iflib periodically
2046  * @ctx: iflib ctx structure
2047  * @qid: the queue this timer was called for
2048  *
2049  * This callback is triggered by iflib periodically. We use it to update the
2050  * hw statistics.
2051  *
2052  * @remark this function is not protected by the iflib CTX lock.
2053  */
2054 static void
2055 ice_if_timer(if_ctx_t ctx, uint16_t qid)
2056 {
2057 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2058 	uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx;
2059 
2060 	if (qid != 0)
2061 		return;
2062 
2063 	/* Do not attempt to update stats when in recovery mode */
2064 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2065 		return;
2066 
2067 	/* Update device statistics */
2068 	ice_update_pf_stats(sc);
2069 
2070 	/*
2071 	 * For proper watchdog management, the iflib stack needs to know if
2072 	 * we've been paused during the last interval. Check if the
2073 	 * link_xoff_rx stat changed, and set the isc_pause_frames, if so.
2074 	 */
2075 	if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx)
2076 		sc->scctx->isc_pause_frames = 1;
2077 
2078 	/* Update the primary VSI stats */
2079 	ice_update_vsi_hw_stats(&sc->pf_vsi);
2080 }
2081 
2082 /**
2083  * ice_admin_timer - called periodically to trigger the admin task
2084  * @arg: callout(9) argument pointing to the device private softc structure
2085  *
2086  * Timer function used as part of a callout(9) timer that will periodically
2087  * trigger the admin task, even when the interface is down.
2088  *
2089  * @remark this function is not called by iflib and is not protected by the
2090  * iflib CTX lock.
2091  *
2092  * @remark because this is a callout function, it cannot sleep and should not
2093  * attempt taking the iflib CTX lock.
2094  */
2095 static void
2096 ice_admin_timer(void *arg)
2097 {
2098 	struct ice_softc *sc = (struct ice_softc *)arg;
2099 
2100 	/*
2101 	 * There is a point where callout routines are no longer
2102 	 * cancelable.  So there exists a window of time where the
2103 	 * driver enters detach() and tries to cancel the callout, but the
2104 	 * callout routine has passed the cancellation point.  The detach()
2105 	 * routine is unaware of this and tries to free resources that the
2106 	 * callout routine needs.  So we check for the detach state flag to
2107 	 * at least shrink the window of opportunity.
2108 	 */
2109 	if (ice_driver_is_detaching(sc))
2110 		return;
2111 
2112 	/* Fire off the admin task */
2113 	iflib_admin_intr_deferred(sc->ctx);
2114 
2115 	/* Reschedule the admin timer */
2116 	callout_schedule(&sc->admin_timer, hz/2);
2117 }
2118 
2119 /**
2120  * ice_transition_recovery_mode - Transition to recovery mode
2121  * @sc: the device private softc
2122  *
2123  * Called when the driver detects that the firmware has entered recovery mode
2124  * at run time.
2125  */
2126 static void
2127 ice_transition_recovery_mode(struct ice_softc *sc)
2128 {
2129 	struct ice_vsi *vsi = &sc->pf_vsi;
2130 	int i;
2131 
2132 	device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
2133 
2134 	/* Tell the stack that the link has gone down */
2135 	iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
2136 
2137 	/* Request that the device be re-initialized */
2138 	ice_request_stack_reinit(sc);
2139 
2140 	ice_rdma_pf_detach(sc);
2141 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2142 
2143 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2144 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2145 
2146 	ice_vsi_del_txqs_ctx(vsi);
2147 	ice_vsi_del_rxqs_ctx(vsi);
2148 
2149 	for (i = 0; i < sc->num_available_vsi; i++) {
2150 		if (sc->all_vsi[i])
2151 			ice_release_vsi(sc->all_vsi[i]);
2152 	}
2153 	sc->num_available_vsi = 0;
2154 
2155 	if (sc->all_vsi) {
2156 		free(sc->all_vsi, M_ICE);
2157 		sc->all_vsi = NULL;
2158 	}
2159 
2160 	/* Destroy the interrupt manager */
2161 	ice_resmgr_destroy(&sc->imgr);
2162 	/* Destroy the queue managers */
2163 	ice_resmgr_destroy(&sc->tx_qmgr);
2164 	ice_resmgr_destroy(&sc->rx_qmgr);
2165 
2166 	ice_deinit_hw(&sc->hw);
2167 }
2168 
2169 /**
2170  * ice_transition_safe_mode - Transition to safe mode
2171  * @sc: the device private softc
2172  *
2173  * Called when the driver attempts to reload the DDP package during a device
2174  * reset, and the new download fails. If so, we must transition to safe mode
2175  * at run time.
2176  *
2177  * @remark although safe mode normally allocates only a single queue, we can't
2178  * change the number of queues dynamically when using iflib. Due to this, we
2179  * do not attempt to reduce the number of queues.
2180  */
2181 static void
2182 ice_transition_safe_mode(struct ice_softc *sc)
2183 {
2184 	/* Indicate that we are in Safe mode */
2185 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
2186 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
2187 
2188 	ice_rdma_pf_detach(sc);
2189 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2190 
2191 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2192 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2193 
2194 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2195 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en);
2196 }
2197 
2198 /**
2199  * ice_if_update_admin_status - update admin status
2200  * @ctx: iflib ctx structure
2201  *
2202  * Called by iflib to update the admin status. For our purposes, this means
2203  * check the adminq, and update the link status. It's ultimately triggered by
2204  * our admin interrupt, or by the ice_if_timer periodically.
2205  *
2206  * @pre assumes the caller holds the iflib CTX lock
2207  */
2208 static void
2209 ice_if_update_admin_status(if_ctx_t ctx)
2210 {
2211 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2212 	enum ice_fw_modes fw_mode;
2213 	bool reschedule = false;
2214 	u16 pending = 0;
2215 
2216 	ASSERT_CTX_LOCKED(sc);
2217 
2218 	/* Check if the firmware entered recovery mode at run time */
2219 	fw_mode = ice_get_fw_mode(&sc->hw);
2220 	if (fw_mode == ICE_FW_MODE_REC) {
2221 		if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2222 			/* If we just entered recovery mode, log a warning to
2223 			 * the system administrator and deinit driver state
2224 			 * that is no longer functional.
2225 			 */
2226 			ice_transition_recovery_mode(sc);
2227 		}
2228 	} else if (fw_mode == ICE_FW_MODE_ROLLBACK) {
2229 		if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) {
2230 			/* Rollback mode isn't fatal, but we don't want to
2231 			 * repeatedly post a message about it.
2232 			 */
2233 			ice_print_rollback_msg(&sc->hw);
2234 		}
2235 	}
2236 
2237 	/* Handle global reset events */
2238 	ice_handle_reset_event(sc);
2239 
2240 	/* Handle PF reset requests */
2241 	ice_handle_pf_reset_request(sc);
2242 
2243 	/* Handle MDD events */
2244 	ice_handle_mdd_event(sc);
2245 
2246 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) ||
2247 	    ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) ||
2248 	    ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2249 		/*
2250 		 * If we know the control queues are disabled, skip processing
2251 		 * the control queues entirely.
2252 		 */
2253 		;
2254 	} else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) {
2255 		ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending);
2256 		if (pending > 0)
2257 			reschedule = true;
2258 
2259 		ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending);
2260 		if (pending > 0)
2261 			reschedule = true;
2262 	}
2263 
2264 	/* Poll for link up */
2265 	ice_poll_for_media_avail(sc);
2266 
2267 	/* Check and update link status */
2268 	ice_update_link_status(sc, false);
2269 
2270 	/*
2271 	 * If there are still messages to process, we need to reschedule
2272 	 * ourselves. Otherwise, we can just re-enable the interrupt. We'll be
2273 	 * woken up at the next interrupt or timer event.
2274 	 */
2275 	if (reschedule) {
2276 		ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
2277 		iflib_admin_intr_deferred(ctx);
2278 	} else {
2279 		ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2280 	}
2281 }
2282 
2283 /**
2284  * ice_prepare_for_reset - Prepare device for an impending reset
2285  * @sc: The device private softc
2286  *
2287  * Prepare the driver for an impending reset, shutting down VSIs, clearing the
2288  * scheduler setup, and shutting down controlqs. Uses the
2289  * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the
2290  * driver for reset or not.
2291  */
2292 static void
2293 ice_prepare_for_reset(struct ice_softc *sc)
2294 {
2295 	struct ice_hw *hw = &sc->hw;
2296 
2297 	/* If we're already prepared, there's nothing to do */
2298 	if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET))
2299 		return;
2300 
2301 	log(LOG_INFO, "%s: preparing to reset device logic\n", if_name(sc->ifp));
2302 
2303 	/* In recovery mode, hardware is not initialized */
2304 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2305 		return;
2306 
2307 	/* stop the RDMA client */
2308 	ice_rdma_pf_stop(sc);
2309 
2310 	/* Release the main PF VSI queue mappings */
2311 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2312 				    sc->pf_vsi.num_tx_queues);
2313 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2314 				    sc->pf_vsi.num_rx_queues);
2315 
2316 	ice_clear_hw_tbls(hw);
2317 
2318 	if (hw->port_info)
2319 		ice_sched_clear_port(hw->port_info);
2320 
2321 	ice_shutdown_all_ctrlq(hw, false);
2322 }
2323 
2324 /**
2325  * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping
2326  * @sc: the device softc pointer
2327  *
2328  * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue
2329  * mapping after a reset occurred.
2330  */
2331 static int
2332 ice_rebuild_pf_vsi_qmap(struct ice_softc *sc)
2333 {
2334 	struct ice_vsi *vsi = &sc->pf_vsi;
2335 	struct ice_tx_queue *txq;
2336 	struct ice_rx_queue *rxq;
2337 	int err, i;
2338 
2339 	/* Re-assign Tx queues from PF space to the main VSI */
2340 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap,
2341 					    vsi->num_tx_queues);
2342 	if (err) {
2343 		device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n",
2344 			      ice_err_str(err));
2345 		return (err);
2346 	}
2347 
2348 	/* Re-assign Rx queues from PF space to this VSI */
2349 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap,
2350 					    vsi->num_rx_queues);
2351 	if (err) {
2352 		device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n",
2353 			      ice_err_str(err));
2354 		goto err_release_tx_queues;
2355 	}
2356 
2357 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
2358 
2359 	/* Re-assign Tx queue tail pointers */
2360 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
2361 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
2362 
2363 	/* Re-assign Rx queue tail pointers */
2364 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
2365 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
2366 
2367 	return (0);
2368 
2369 err_release_tx_queues:
2370 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2371 				   sc->pf_vsi.num_tx_queues);
2372 
2373 	return (err);
2374 }
2375 
2376 /* determine if the iflib context is active */
2377 #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING))
2378 
2379 /**
2380  * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode
2381  * @sc: The device private softc
2382  *
2383  * Handle a driver rebuild while in recovery mode. This will only rebuild the
2384  * limited functionality supported while in recovery mode.
2385  */
2386 static void
2387 ice_rebuild_recovery_mode(struct ice_softc *sc)
2388 {
2389 	device_t dev = sc->dev;
2390 
2391 	/* enable PCIe bus master */
2392 	pci_enable_busmaster(dev);
2393 
2394 	/* Configure interrupt causes for the administrative interrupt */
2395 	ice_configure_misc_interrupts(sc);
2396 
2397 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2398 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2399 
2400 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2401 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2402 
2403 	log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp));
2404 
2405 	/* In order to completely restore device functionality, the iflib core
2406 	 * needs to be reset. We need to request an iflib reset. Additionally,
2407 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2408 	 * the iflib core, we also want re-run the admin task so that iflib
2409 	 * resets immediately instead of waiting for the next interrupt.
2410 	 */
2411 	ice_request_stack_reinit(sc);
2412 
2413 	return;
2414 }
2415 
2416 /**
2417  * ice_rebuild - Rebuild driver state post reset
2418  * @sc: The device private softc
2419  *
2420  * Restore driver state after a reset occurred. Restart the controlqs, setup
2421  * the hardware port, and re-enable the VSIs.
2422  */
2423 static void
2424 ice_rebuild(struct ice_softc *sc)
2425 {
2426 	struct ice_hw *hw = &sc->hw;
2427 	device_t dev = sc->dev;
2428 	enum ice_ddp_state pkg_state;
2429 	enum ice_status status;
2430 	int err;
2431 
2432 	sc->rebuild_ticks = ticks;
2433 
2434 	/* If we're rebuilding, then a reset has succeeded. */
2435 	ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED);
2436 
2437 	/*
2438 	 * If the firmware is in recovery mode, only restore the limited
2439 	 * functionality supported by recovery mode.
2440 	 */
2441 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2442 		ice_rebuild_recovery_mode(sc);
2443 		return;
2444 	}
2445 
2446 	/* enable PCIe bus master */
2447 	pci_enable_busmaster(dev);
2448 
2449 	status = ice_init_all_ctrlq(hw);
2450 	if (status) {
2451 		device_printf(dev, "failed to re-init controlqs, err %s\n",
2452 			      ice_status_str(status));
2453 		goto err_shutdown_ctrlq;
2454 	}
2455 
2456 	/* Query the allocated resources for Tx scheduler */
2457 	status = ice_sched_query_res_alloc(hw);
2458 	if (status) {
2459 		device_printf(dev,
2460 			      "Failed to query scheduler resources, err %s aq_err %s\n",
2461 			      ice_status_str(status),
2462 			      ice_aq_str(hw->adminq.sq_last_status));
2463 		goto err_shutdown_ctrlq;
2464 	}
2465 
2466 	/* Re-enable FW logging. Keep going even if this fails */
2467 	status = ice_fwlog_set(hw, &hw->fwlog_cfg);
2468 	if (!status) {
2469 		/*
2470 		 * We should have the most updated cached copy of the
2471 		 * configuration, regardless of whether we're rebuilding
2472 		 * or not.  So we'll simply check to see if logging was
2473 		 * enabled pre-rebuild.
2474 		 */
2475 		if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2476 			status = ice_fwlog_register(hw);
2477 			if (status)
2478 				device_printf(dev, "failed to re-register fw logging, err %s aq_err %s\n",
2479 				   ice_status_str(status),
2480 				   ice_aq_str(hw->adminq.sq_last_status));
2481 		}
2482 	} else
2483 		device_printf(dev, "failed to rebuild fw logging configuration, err %s aq_err %s\n",
2484 		   ice_status_str(status),
2485 		   ice_aq_str(hw->adminq.sq_last_status));
2486 
2487 	err = ice_send_version(sc);
2488 	if (err)
2489 		goto err_shutdown_ctrlq;
2490 
2491 	err = ice_init_link_events(sc);
2492 	if (err) {
2493 		device_printf(dev, "ice_init_link_events failed: %s\n",
2494 			      ice_err_str(err));
2495 		goto err_shutdown_ctrlq;
2496 	}
2497 
2498 	status = ice_clear_pf_cfg(hw);
2499 	if (status) {
2500 		device_printf(dev, "failed to clear PF configuration, err %s\n",
2501 			      ice_status_str(status));
2502 		goto err_shutdown_ctrlq;
2503 	}
2504 
2505 	ice_clear_pxe_mode(hw);
2506 
2507 	status = ice_get_caps(hw);
2508 	if (status) {
2509 		device_printf(dev, "failed to get capabilities, err %s\n",
2510 			      ice_status_str(status));
2511 		goto err_shutdown_ctrlq;
2512 	}
2513 
2514 	status = ice_sched_init_port(hw->port_info);
2515 	if (status) {
2516 		device_printf(dev, "failed to initialize port, err %s\n",
2517 			      ice_status_str(status));
2518 		goto err_sched_cleanup;
2519 	}
2520 
2521 	/* If we previously loaded the package, it needs to be reloaded now */
2522 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) {
2523 		pkg_state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
2524 		if (!ice_is_init_pkg_successful(pkg_state)) {
2525 			ice_log_pkg_init(sc, pkg_state);
2526 			ice_transition_safe_mode(sc);
2527 		}
2528 	}
2529 
2530 	ice_reset_pf_stats(sc);
2531 
2532 	err = ice_rebuild_pf_vsi_qmap(sc);
2533 	if (err) {
2534 		device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n",
2535 			      ice_err_str(err));
2536 		goto err_sched_cleanup;
2537 	}
2538 	err = ice_initialize_vsi(&sc->pf_vsi);
2539 	if (err) {
2540 		device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n",
2541 			      ice_err_str(err));
2542 		goto err_release_queue_allocations;
2543 	}
2544 
2545 	/* Replay all VSI configuration */
2546 	err = ice_replay_all_vsi_cfg(sc);
2547 	if (err)
2548 		goto err_deinit_pf_vsi;
2549 
2550 	/* Re-enable FW health event reporting */
2551 	ice_init_health_events(sc);
2552 
2553 	/* Reconfigure the main PF VSI for RSS */
2554 	err = ice_config_rss(&sc->pf_vsi);
2555 	if (err) {
2556 		device_printf(sc->dev,
2557 			      "Unable to reconfigure RSS for the main VSI, err %s\n",
2558 			      ice_err_str(err));
2559 		goto err_deinit_pf_vsi;
2560 	}
2561 
2562 	/* Refresh link status */
2563 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2564 	sc->hw.port_info->phy.get_link_info = true;
2565 	ice_get_link_status(sc->hw.port_info, &sc->link_up);
2566 	ice_update_link_status(sc, true);
2567 
2568 	/* RDMA interface will be restarted by the stack re-init */
2569 
2570 	/* Configure interrupt causes for the administrative interrupt */
2571 	ice_configure_misc_interrupts(sc);
2572 
2573 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2574 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2575 
2576 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2577 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2578 
2579 	log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp));
2580 
2581 	/* In order to completely restore device functionality, the iflib core
2582 	 * needs to be reset. We need to request an iflib reset. Additionally,
2583 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2584 	 * the iflib core, we also want re-run the admin task so that iflib
2585 	 * resets immediately instead of waiting for the next interrupt.
2586 	 */
2587 	ice_request_stack_reinit(sc);
2588 
2589 	return;
2590 
2591 err_deinit_pf_vsi:
2592 	ice_deinit_vsi(&sc->pf_vsi);
2593 err_release_queue_allocations:
2594 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2595 				    sc->pf_vsi.num_tx_queues);
2596 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2597 				    sc->pf_vsi.num_rx_queues);
2598 err_sched_cleanup:
2599 	ice_sched_cleanup_all(hw);
2600 err_shutdown_ctrlq:
2601 	ice_shutdown_all_ctrlq(hw, false);
2602 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2603 	ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2604 	device_printf(dev, "Driver rebuild failed, please reload the device driver\n");
2605 }
2606 
2607 /**
2608  * ice_handle_reset_event - Handle reset events triggered by OICR
2609  * @sc: The device private softc
2610  *
2611  * Handle reset events triggered by an OICR notification. This includes CORER,
2612  * GLOBR, and EMPR resets triggered by software on this or any other PF or by
2613  * firmware.
2614  *
2615  * @pre assumes the iflib context lock is held, and will unlock it while
2616  * waiting for the hardware to finish reset.
2617  */
2618 static void
2619 ice_handle_reset_event(struct ice_softc *sc)
2620 {
2621 	struct ice_hw *hw = &sc->hw;
2622 	enum ice_status status;
2623 	device_t dev = sc->dev;
2624 
2625 	/* When a CORER, GLOBR, or EMPR is about to happen, the hardware will
2626 	 * trigger an OICR interrupt. Our OICR handler will determine when
2627 	 * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as
2628 	 * appropriate.
2629 	 */
2630 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
2631 		return;
2632 
2633 	ice_prepare_for_reset(sc);
2634 
2635 	/*
2636 	 * Release the iflib context lock and wait for the device to finish
2637 	 * resetting.
2638 	 */
2639 	IFLIB_CTX_UNLOCK(sc);
2640 	status = ice_check_reset(hw);
2641 	IFLIB_CTX_LOCK(sc);
2642 	if (status) {
2643 		device_printf(dev, "Device never came out of reset, err %s\n",
2644 			      ice_status_str(status));
2645 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2646 		return;
2647 	}
2648 
2649 	/* We're done with the reset, so we can rebuild driver state */
2650 	sc->hw.reset_ongoing = false;
2651 	ice_rebuild(sc);
2652 
2653 	/* In the unlikely event that a PF reset request occurs at the same
2654 	 * time as a global reset, clear the request now. This avoids
2655 	 * resetting a second time right after we reset due to a global event.
2656 	 */
2657 	if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2658 		device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n");
2659 }
2660 
2661 /**
2662  * ice_handle_pf_reset_request - Initiate PF reset requested by software
2663  * @sc: The device private softc
2664  *
2665  * Initiate a PF reset requested by software. We handle this in the admin task
2666  * so that only one thread actually handles driver preparation and cleanup,
2667  * rather than having multiple threads possibly attempt to run this code
2668  * simultaneously.
2669  *
2670  * @pre assumes the iflib context lock is held and will unlock it while
2671  * waiting for the PF reset to complete.
2672  */
2673 static void
2674 ice_handle_pf_reset_request(struct ice_softc *sc)
2675 {
2676 	struct ice_hw *hw = &sc->hw;
2677 	enum ice_status status;
2678 
2679 	/* Check for PF reset requests */
2680 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2681 		return;
2682 
2683 	/* Make sure we're prepared for reset */
2684 	ice_prepare_for_reset(sc);
2685 
2686 	/*
2687 	 * Release the iflib context lock and wait for the device to finish
2688 	 * resetting.
2689 	 */
2690 	IFLIB_CTX_UNLOCK(sc);
2691 	status = ice_reset(hw, ICE_RESET_PFR);
2692 	IFLIB_CTX_LOCK(sc);
2693 	if (status) {
2694 		device_printf(sc->dev, "device PF reset failed, err %s\n",
2695 			      ice_status_str(status));
2696 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2697 		return;
2698 	}
2699 
2700 	sc->soft_stats.pfr_count++;
2701 	ice_rebuild(sc);
2702 }
2703 
2704 /**
2705  * ice_init_device_features - Init device driver features
2706  * @sc: driver softc structure
2707  *
2708  * @pre assumes that the function capabilities bits have been set up by
2709  * ice_init_hw().
2710  */
2711 static void
2712 ice_init_device_features(struct ice_softc *sc)
2713 {
2714 	/* Set capabilities that all devices support */
2715 	ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2716 	ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
2717 	ice_set_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2718 	ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
2719 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap);
2720 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap);
2721 	ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2722 	ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2723 	ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_cap);
2724 	ice_set_bit(ICE_FEATURE_DCB, sc->feat_cap);
2725 	ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2726 
2727 	/* Disable features due to hardware limitations... */
2728 	if (!sc->hw.func_caps.common_cap.rss_table_size)
2729 		ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2730 	if (!sc->hw.func_caps.common_cap.iwarp || !ice_enable_irdma)
2731 		ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2732 	if (!sc->hw.func_caps.common_cap.dcb)
2733 		ice_clear_bit(ICE_FEATURE_DCB, sc->feat_cap);
2734 	/* Disable features due to firmware limitations... */
2735 	if (!ice_is_fw_health_report_supported(&sc->hw))
2736 		ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2737 	if (!ice_fwlog_supported(&sc->hw))
2738 		ice_clear_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2739 	if (sc->hw.fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2740 		if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_FW_LOGGING))
2741 			ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_en);
2742 		else
2743 			ice_fwlog_unregister(&sc->hw);
2744 	}
2745 
2746 	/* Disable capabilities not supported by the OS */
2747 	ice_disable_unsupported_features(sc->feat_cap);
2748 
2749 	/* RSS is always enabled for iflib */
2750 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS))
2751 		ice_set_bit(ICE_FEATURE_RSS, sc->feat_en);
2752 
2753 	/* Disable features based on sysctl settings */
2754 	if (!ice_tx_balance_en)
2755 		ice_clear_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2756 }
2757 
2758 /**
2759  * ice_if_multi_set - Callback to update Multicast filters in HW
2760  * @ctx: iflib ctx structure
2761  *
2762  * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search
2763  * the if_multiaddrs list and determine which filters have been added or
2764  * removed from the list, and update HW programming to reflect the new list.
2765  *
2766  * @pre assumes the caller holds the iflib CTX lock
2767  */
2768 static void
2769 ice_if_multi_set(if_ctx_t ctx)
2770 {
2771 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2772 	int err;
2773 
2774 	ASSERT_CTX_LOCKED(sc);
2775 
2776 	/* Do not handle multicast configuration in recovery mode */
2777 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2778 		return;
2779 
2780 	err = ice_sync_multicast_filters(sc);
2781 	if (err) {
2782 		device_printf(sc->dev,
2783 			      "Failed to synchronize multicast filter list: %s\n",
2784 			      ice_err_str(err));
2785 		return;
2786 	}
2787 }
2788 
2789 /**
2790  * ice_if_vlan_register - Register a VLAN with the hardware
2791  * @ctx: iflib ctx pointer
2792  * @vtag: VLAN to add
2793  *
2794  * Programs the main PF VSI with a hardware filter for the given VLAN.
2795  *
2796  * @pre assumes the caller holds the iflib CTX lock
2797  */
2798 static void
2799 ice_if_vlan_register(if_ctx_t ctx, u16 vtag)
2800 {
2801 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2802 	enum ice_status status;
2803 
2804 	ASSERT_CTX_LOCKED(sc);
2805 
2806 	/* Do not handle VLAN configuration in recovery mode */
2807 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2808 		return;
2809 
2810 	status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag);
2811 	if (status) {
2812 		device_printf(sc->dev,
2813 			      "Failure adding VLAN %d to main VSI, err %s aq_err %s\n",
2814 			      vtag, ice_status_str(status),
2815 			      ice_aq_str(sc->hw.adminq.sq_last_status));
2816 	}
2817 }
2818 
2819 /**
2820  * ice_if_vlan_unregister - Remove a VLAN filter from the hardware
2821  * @ctx: iflib ctx pointer
2822  * @vtag: VLAN to add
2823  *
2824  * Removes the previously programmed VLAN filter from the main PF VSI.
2825  *
2826  * @pre assumes the caller holds the iflib CTX lock
2827  */
2828 static void
2829 ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag)
2830 {
2831 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2832 	enum ice_status status;
2833 
2834 	ASSERT_CTX_LOCKED(sc);
2835 
2836 	/* Do not handle VLAN configuration in recovery mode */
2837 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2838 		return;
2839 
2840 	status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag);
2841 	if (status) {
2842 		device_printf(sc->dev,
2843 			      "Failure removing VLAN %d from main VSI, err %s aq_err %s\n",
2844 			      vtag, ice_status_str(status),
2845 			      ice_aq_str(sc->hw.adminq.sq_last_status));
2846 	}
2847 }
2848 
2849 /**
2850  * ice_if_stop - Stop the device
2851  * @ctx: iflib context structure
2852  *
2853  * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0
2854  * down)
2855  *
2856  * @pre assumes the caller holds the iflib CTX lock
2857  */
2858 static void
2859 ice_if_stop(if_ctx_t ctx)
2860 {
2861 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2862 
2863 	ASSERT_CTX_LOCKED(sc);
2864 
2865 	/*
2866 	 * The iflib core may call IFDI_STOP prior to the first call to
2867 	 * IFDI_INIT. This will cause us to attempt to remove MAC filters we
2868 	 * don't have, and disable Tx queues which aren't yet configured.
2869 	 * Although it is likely these extra operations are harmless, they do
2870 	 * cause spurious warning messages to be displayed, which may confuse
2871 	 * users.
2872 	 *
2873 	 * To avoid these messages, we use a state bit indicating if we've
2874 	 * been initialized. It will be set when ice_if_init is called, and
2875 	 * cleared here in ice_if_stop.
2876 	 */
2877 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
2878 		return;
2879 
2880 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
2881 		device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n");
2882 		return;
2883 	}
2884 
2885 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
2886 		device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n");
2887 		return;
2888 	}
2889 
2890 	ice_rdma_pf_stop(sc);
2891 
2892 	/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
2893 	 * return of these functions because there's nothing we can really do
2894 	 * if they fail, and the functions already print error messages.
2895 	 * Just try to shut down as much as we can.
2896 	 */
2897 	ice_rm_pf_default_mac_filters(sc);
2898 
2899 	/* Dissociate the Tx and Rx queues from the interrupts */
2900 	ice_flush_txq_interrupts(&sc->pf_vsi);
2901 	ice_flush_rxq_interrupts(&sc->pf_vsi);
2902 
2903 	/* Disable the Tx and Rx queues */
2904 	ice_vsi_disable_tx(&sc->pf_vsi);
2905 	ice_control_rx_queues(&sc->pf_vsi, false);
2906 }
2907 
2908 /**
2909  * ice_if_get_counter - Get current value of an ifnet statistic
2910  * @ctx: iflib context pointer
2911  * @counter: ifnet counter to read
2912  *
2913  * Reads the current value of an ifnet counter for the device.
2914  *
2915  * This function is not protected by the iflib CTX lock.
2916  */
2917 static uint64_t
2918 ice_if_get_counter(if_ctx_t ctx, ift_counter counter)
2919 {
2920 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2921 
2922 	/* Return the counter for the main PF VSI */
2923 	return ice_get_ifnet_counter(&sc->pf_vsi, counter);
2924 }
2925 
2926 /**
2927  * ice_request_stack_reinit - Request that iflib re-initialize
2928  * @sc: the device private softc
2929  *
2930  * Request that the device be brought down and up, to re-initialize. For
2931  * example, this may be called when a device reset occurs, or when Tx and Rx
2932  * queues need to be re-initialized.
2933  *
2934  * This is required because the iflib state is outside the driver, and must be
2935  * re-initialized if we need to resart Tx and Rx queues.
2936  */
2937 void
2938 ice_request_stack_reinit(struct ice_softc *sc)
2939 {
2940 	if (CTX_ACTIVE(sc->ctx)) {
2941 		iflib_request_reset(sc->ctx);
2942 		iflib_admin_intr_deferred(sc->ctx);
2943 	}
2944 }
2945 
2946 /**
2947  * ice_driver_is_detaching - Check if the driver is detaching/unloading
2948  * @sc: device private softc
2949  *
2950  * Returns true if the driver is detaching, false otherwise.
2951  *
2952  * @remark on newer kernels, take advantage of iflib_in_detach in order to
2953  * report detachment correctly as early as possible.
2954  *
2955  * @remark this function is used by various code paths that want to avoid
2956  * running if the driver is about to be removed. This includes sysctls and
2957  * other driver access points. Note that it does not fully resolve
2958  * detach-based race conditions as it is possible for a thread to race with
2959  * iflib_in_detach.
2960  */
2961 bool
2962 ice_driver_is_detaching(struct ice_softc *sc)
2963 {
2964 	return (ice_test_state(&sc->state, ICE_STATE_DETACHING) ||
2965 		iflib_in_detach(sc->ctx));
2966 }
2967 
2968 /**
2969  * ice_if_priv_ioctl - Device private ioctl handler
2970  * @ctx: iflib context pointer
2971  * @command: The ioctl command issued
2972  * @data: ioctl specific data
2973  *
2974  * iflib callback for handling custom driver specific ioctls.
2975  *
2976  * @pre Assumes that the iflib context lock is held.
2977  */
2978 static int
2979 ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
2980 {
2981 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2982 	struct ifdrv *ifd;
2983 	device_t dev = sc->dev;
2984 
2985 	if (data == NULL)
2986 		return (EINVAL);
2987 
2988 	ASSERT_CTX_LOCKED(sc);
2989 
2990 	/* Make sure the command type is valid */
2991 	switch (command) {
2992 	case SIOCSDRVSPEC:
2993 	case SIOCGDRVSPEC:
2994 		/* Accepted commands */
2995 		break;
2996 	case SIOCGPRIVATE_0:
2997 		/*
2998 		 * Although we do not support this ioctl command, it's
2999 		 * expected that iflib will forward it to the IFDI_PRIV_IOCTL
3000 		 * handler. Do not print a message in this case
3001 		 */
3002 		return (ENOTSUP);
3003 	default:
3004 		/*
3005 		 * If we get a different command for this function, it's
3006 		 * definitely unexpected, so log a message indicating what
3007 		 * command we got for debugging purposes.
3008 		 */
3009 		device_printf(dev, "%s: unexpected ioctl command %08lx\n",
3010 			      __func__, command);
3011 		return (EINVAL);
3012 	}
3013 
3014 	ifd = (struct ifdrv *)data;
3015 
3016 	switch (ifd->ifd_cmd) {
3017 	case ICE_NVM_ACCESS:
3018 		return ice_handle_nvm_access_ioctl(sc, ifd);
3019 	case ICE_DEBUG_DUMP:
3020 		return ice_handle_debug_dump_ioctl(sc, ifd);
3021 	default:
3022 		return EINVAL;
3023 	}
3024 }
3025 
3026 /**
3027  * ice_if_i2c_req - I2C request handler for iflib
3028  * @ctx: iflib context pointer
3029  * @req: The I2C parameters to use
3030  *
3031  * Read from the port's I2C eeprom using the parameters from the ioctl.
3032  *
3033  * @remark The iflib-only part is pretty simple.
3034  */
3035 static int
3036 ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req)
3037 {
3038 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3039 
3040 	return ice_handle_i2c_req(sc, req);
3041 }
3042 
3043 /**
3044  * ice_if_suspend - PCI device suspend handler for iflib
3045  * @ctx: iflib context pointer
3046  *
3047  * Deinitializes the driver and clears HW resources in preparation for
3048  * suspend or an FLR.
3049  *
3050  * @returns 0; this return value is ignored
3051  */
3052 static int
3053 ice_if_suspend(if_ctx_t ctx)
3054 {
3055 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3056 
3057 	/* At least a PFR is always going to happen after this;
3058 	 * either via FLR or during the D3->D0 transition.
3059 	 */
3060 	ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
3061 
3062 	ice_prepare_for_reset(sc);
3063 
3064 	return (0);
3065 }
3066 
3067 /**
3068  * ice_if_resume - PCI device resume handler for iflib
3069  * @ctx: iflib context pointer
3070  *
3071  * Reinitializes the driver and the HW after PCI resume or after
3072  * an FLR. An init is performed by iflib after this function is finished.
3073  *
3074  * @returns 0; this return value is ignored
3075  */
3076 static int
3077 ice_if_resume(if_ctx_t ctx)
3078 {
3079 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3080 
3081 	ice_rebuild(sc);
3082 
3083 	return (0);
3084 }
3085 
3086