xref: /freebsd/sys/dev/ice/if_ice_iflib.c (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2021, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 /*$FreeBSD$*/
32 
33 /**
34  * @file if_ice_iflib.c
35  * @brief iflib driver implementation
36  *
37  * Contains the main entry point for the iflib driver implementation. It
38  * implements the various ifdi driver methods, and sets up the module and
39  * driver values to load an iflib driver.
40  */
41 
42 #include "ice_iflib.h"
43 #include "ice_drv_info.h"
44 #include "ice_switch.h"
45 #include "ice_sched.h"
46 
47 #include <sys/module.h>
48 #include <sys/sockio.h>
49 #include <sys/smp.h>
50 #include <dev/pci/pcivar.h>
51 #include <dev/pci/pcireg.h>
52 
53 /*
54  * Device method prototypes
55  */
56 
57 static void *ice_register(device_t);
58 static int  ice_if_attach_pre(if_ctx_t);
59 static int  ice_attach_pre_recovery_mode(struct ice_softc *sc);
60 static int  ice_if_attach_post(if_ctx_t);
61 static void ice_attach_post_recovery_mode(struct ice_softc *sc);
62 static int  ice_if_detach(if_ctx_t);
63 static int  ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets);
64 static int  ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets);
65 static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix);
66 static void ice_if_queues_free(if_ctx_t ctx);
67 static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu);
68 static void ice_if_intr_enable(if_ctx_t ctx);
69 static void ice_if_intr_disable(if_ctx_t ctx);
70 static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
71 static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
72 static int ice_if_promisc_set(if_ctx_t ctx, int flags);
73 static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
74 static int ice_if_media_change(if_ctx_t ctx);
75 static void ice_if_init(if_ctx_t ctx);
76 static void ice_if_timer(if_ctx_t ctx, uint16_t qid);
77 static void ice_if_update_admin_status(if_ctx_t ctx);
78 static void ice_if_multi_set(if_ctx_t ctx);
79 static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag);
80 static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
81 static void ice_if_stop(if_ctx_t ctx);
82 static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter);
83 static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
84 static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
85 static int ice_if_suspend(if_ctx_t ctx);
86 static int ice_if_resume(if_ctx_t ctx);
87 
88 static int ice_msix_que(void *arg);
89 static int ice_msix_admin(void *arg);
90 
91 /*
92  * Helper function prototypes
93  */
94 static int ice_pci_mapping(struct ice_softc *sc);
95 static void ice_free_pci_mapping(struct ice_softc *sc);
96 static void ice_update_link_status(struct ice_softc *sc, bool update_media);
97 static void ice_init_device_features(struct ice_softc *sc);
98 static void ice_init_tx_tracking(struct ice_vsi *vsi);
99 static void ice_handle_reset_event(struct ice_softc *sc);
100 static void ice_handle_pf_reset_request(struct ice_softc *sc);
101 static void ice_prepare_for_reset(struct ice_softc *sc);
102 static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc);
103 static void ice_rebuild(struct ice_softc *sc);
104 static void ice_rebuild_recovery_mode(struct ice_softc *sc);
105 static void ice_free_irqvs(struct ice_softc *sc);
106 static void ice_update_rx_mbuf_sz(struct ice_softc *sc);
107 static void ice_poll_for_media_avail(struct ice_softc *sc);
108 static void ice_setup_scctx(struct ice_softc *sc);
109 static int ice_allocate_msix(struct ice_softc *sc);
110 static void ice_admin_timer(void *arg);
111 static void ice_transition_recovery_mode(struct ice_softc *sc);
112 static void ice_transition_safe_mode(struct ice_softc *sc);
113 
114 /*
115  * Device Interface Declaration
116  */
117 
118 /**
119  * @var ice_methods
120  * @brief ice driver method entry points
121  *
122  * List of device methods implementing the generic device interface used by
123  * the device stack to interact with the ice driver. Since this is an iflib
124  * driver, most of the methods point to the generic iflib implementation.
125  */
126 static device_method_t ice_methods[] = {
127 	/* Device interface */
128 	DEVMETHOD(device_register, ice_register),
129 	DEVMETHOD(device_probe,    iflib_device_probe_vendor),
130 	DEVMETHOD(device_attach,   iflib_device_attach),
131 	DEVMETHOD(device_detach,   iflib_device_detach),
132 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
133 	DEVMETHOD(device_suspend,  iflib_device_suspend),
134 	DEVMETHOD(device_resume,   iflib_device_resume),
135 	DEVMETHOD_END
136 };
137 
138 /**
139  * @var ice_iflib_methods
140  * @brief iflib method entry points
141  *
142  * List of device methods used by the iflib stack to interact with this
143  * driver. These are the real main entry points used to interact with this
144  * driver.
145  */
146 static device_method_t ice_iflib_methods[] = {
147 	DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre),
148 	DEVMETHOD(ifdi_attach_post, ice_if_attach_post),
149 	DEVMETHOD(ifdi_detach, ice_if_detach),
150 	DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc),
151 	DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc),
152 	DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign),
153 	DEVMETHOD(ifdi_queues_free, ice_if_queues_free),
154 	DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set),
155 	DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable),
156 	DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable),
157 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable),
158 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable),
159 	DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set),
160 	DEVMETHOD(ifdi_media_status, ice_if_media_status),
161 	DEVMETHOD(ifdi_media_change, ice_if_media_change),
162 	DEVMETHOD(ifdi_init, ice_if_init),
163 	DEVMETHOD(ifdi_stop, ice_if_stop),
164 	DEVMETHOD(ifdi_timer, ice_if_timer),
165 	DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status),
166 	DEVMETHOD(ifdi_multi_set, ice_if_multi_set),
167 	DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register),
168 	DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister),
169 	DEVMETHOD(ifdi_get_counter, ice_if_get_counter),
170 	DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl),
171 	DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req),
172 	DEVMETHOD(ifdi_suspend, ice_if_suspend),
173 	DEVMETHOD(ifdi_resume, ice_if_resume),
174 	DEVMETHOD_END
175 };
176 
177 /**
178  * @var ice_driver
179  * @brief driver structure for the generic device stack
180  *
181  * driver_t definition used to setup the generic device methods.
182  */
183 static driver_t ice_driver = {
184 	.name = "ice",
185 	.methods = ice_methods,
186 	.size = sizeof(struct ice_softc),
187 };
188 
189 /**
190  * @var ice_iflib_driver
191  * @brief driver structure for the iflib stack
192  *
193  * driver_t definition used to setup the iflib device methods.
194  */
195 static driver_t ice_iflib_driver = {
196 	.name = "ice",
197 	.methods = ice_iflib_methods,
198 	.size = sizeof(struct ice_softc),
199 };
200 
201 extern struct if_txrx ice_txrx;
202 extern struct if_txrx ice_recovery_txrx;
203 
204 /**
205  * @var ice_sctx
206  * @brief ice driver shared context
207  *
208  * Structure defining shared values (context) that is used by all instances of
209  * the device. Primarily used to setup details about how the iflib stack
210  * should treat this driver. Also defines the default, minimum, and maximum
211  * number of descriptors in each ring.
212  */
213 static struct if_shared_ctx ice_sctx = {
214 	.isc_magic = IFLIB_MAGIC,
215 	.isc_q_align = PAGE_SIZE,
216 
217 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
218 	/* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but
219 	 * that doesn't make sense since that would be larger than the maximum
220 	 * size of a single packet.
221 	 */
222 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
223 
224 	/* XXX: This is only used by iflib to ensure that
225 	 * scctx->isc_tx_tso_size_max + the VLAN header is a valid size.
226 	 */
227 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
228 	/* XXX: This is used by iflib to set the number of segments in the TSO
229 	 * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the
230 	 * related ifnet parameter.
231 	 */
232 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
233 
234 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
235 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
236 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
237 
238 	.isc_nfl = 1,
239 	.isc_ntxqs = 1,
240 	.isc_nrxqs = 1,
241 
242 	.isc_admin_intrcnt = 1,
243 	.isc_vendor_info = ice_vendor_info_array,
244 	.isc_driver_version = __DECONST(char *, ice_driver_version),
245 	.isc_driver = &ice_iflib_driver,
246 
247 	/*
248 	 * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available
249 	 * for hardware checksum offload
250 	 *
251 	 * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the
252 	 * IP sum field, required by our hardware to calculate valid TSO
253 	 * checksums.
254 	 *
255 	 * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs
256 	 * even when the interface is down.
257 	 *
258 	 * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X
259 	 * vectors manually instead of relying on iflib code to do this.
260 	 */
261 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
262 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX,
263 
264 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
265 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
266 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
267 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
268 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
269 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
270 };
271 
272 DRIVER_MODULE(ice, pci, ice_driver, ice_module_event_handler, NULL);
273 
274 MODULE_VERSION(ice, 1);
275 MODULE_DEPEND(ice, pci, 1, 1, 1);
276 MODULE_DEPEND(ice, ether, 1, 1, 1);
277 MODULE_DEPEND(ice, iflib, 1, 1, 1);
278 
279 IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array);
280 
281 /* Static driver-wide sysctls */
282 #include "ice_iflib_sysctls.h"
283 
284 /**
285  * ice_pci_mapping - Map PCI BAR memory
286  * @sc: device private softc
287  *
288  * Map PCI BAR 0 for device operation.
289  */
290 static int
291 ice_pci_mapping(struct ice_softc *sc)
292 {
293 	int rc;
294 
295 	/* Map BAR0 */
296 	rc = ice_map_bar(sc->dev, &sc->bar0, 0);
297 	if (rc)
298 		return rc;
299 
300 	return 0;
301 }
302 
303 /**
304  * ice_free_pci_mapping - Release PCI BAR memory
305  * @sc: device private softc
306  *
307  * Release PCI BARs which were previously mapped by ice_pci_mapping().
308  */
309 static void
310 ice_free_pci_mapping(struct ice_softc *sc)
311 {
312 	/* Free BAR0 */
313 	ice_free_bar(sc->dev, &sc->bar0);
314 }
315 
316 /*
317  * Device methods
318  */
319 
320 /**
321  * ice_register - register device method callback
322  * @dev: the device being registered
323  *
324  * Returns a pointer to the shared context structure, which is used by iflib.
325  */
326 static void *
327 ice_register(device_t dev __unused)
328 {
329 	return &ice_sctx;
330 } /* ice_register */
331 
332 /**
333  * ice_setup_scctx - Setup the iflib softc context structure
334  * @sc: the device private structure
335  *
336  * Setup the parameters in if_softc_ctx_t structure used by the iflib stack
337  * when loading.
338  */
339 static void
340 ice_setup_scctx(struct ice_softc *sc)
341 {
342 	if_softc_ctx_t scctx = sc->scctx;
343 	struct ice_hw *hw = &sc->hw;
344 	bool safe_mode, recovery_mode;
345 
346 	safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE);
347 	recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE);
348 
349 	/*
350 	 * If the driver loads in Safe mode or Recovery mode, limit iflib to
351 	 * a single queue pair.
352 	 */
353 	if (safe_mode || recovery_mode) {
354 		scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1;
355 		scctx->isc_ntxqsets_max = 1;
356 		scctx->isc_nrxqsets_max = 1;
357 	} else {
358 		/*
359 		 * iflib initially sets the isc_ntxqsets and isc_nrxqsets to
360 		 * the values of the override sysctls. Cache these initial
361 		 * values so that the driver can be aware of what the iflib
362 		 * sysctl value is when setting up MSI-X vectors.
363 		 */
364 		sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets;
365 		sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets;
366 
367 		if (scctx->isc_ntxqsets == 0)
368 			scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size;
369 		if (scctx->isc_nrxqsets == 0)
370 			scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size;
371 
372 		scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq;
373 		scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq;
374 
375 		/*
376 		 * Sanity check that the iflib sysctl values are within the
377 		 * maximum supported range.
378 		 */
379 		if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max)
380 			sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max;
381 		if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max)
382 			sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max;
383 	}
384 
385 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
386 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
387 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
388 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
389 
390 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
391 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
392 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
393 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
394 
395 	scctx->isc_msix_bar = PCIR_BAR(ICE_MSIX_BAR);
396 	scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size;
397 
398 	/*
399 	 * If the driver loads in recovery mode, disable Tx/Rx functionality
400 	 */
401 	if (recovery_mode)
402 		scctx->isc_txrx = &ice_recovery_txrx;
403 	else
404 		scctx->isc_txrx = &ice_txrx;
405 
406 	/*
407 	 * If the driver loads in Safe mode or Recovery mode, disable
408 	 * advanced features including hardware offloads.
409 	 */
410 	if (safe_mode || recovery_mode) {
411 		scctx->isc_capenable = ICE_SAFE_CAPS;
412 		scctx->isc_tx_csum_flags = 0;
413 	} else {
414 		scctx->isc_capenable = ICE_FULL_CAPS;
415 		scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
416 	}
417 
418 	scctx->isc_capabilities = scctx->isc_capenable;
419 } /* ice_setup_scctx */
420 
421 /**
422  * ice_if_attach_pre - Early device attach logic
423  * @ctx: the iflib context structure
424  *
425  * Called by iflib during the attach process. Earliest main driver entry
426  * point which performs necessary hardware and driver initialization. Called
427  * before the Tx and Rx queues are allocated.
428  */
429 static int
430 ice_if_attach_pre(if_ctx_t ctx)
431 {
432 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
433 	enum ice_fw_modes fw_mode;
434 	enum ice_status status;
435 	if_softc_ctx_t scctx;
436 	struct ice_hw *hw;
437 	device_t dev;
438 	int err;
439 
440 	device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n");
441 
442 	ice_set_state(&sc->state, ICE_STATE_ATTACHING);
443 
444 	sc->ctx = ctx;
445 	sc->media = iflib_get_media(ctx);
446 	sc->sctx = iflib_get_sctx(ctx);
447 	sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx);
448 
449 	dev = sc->dev = iflib_get_dev(ctx);
450 	scctx = sc->scctx = iflib_get_softc_ctx(ctx);
451 
452 	hw = &sc->hw;
453 	hw->back = sc;
454 
455 	snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name),
456 		 "%s:admin", device_get_nameunit(dev));
457 	mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF);
458 	callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0);
459 
460 	ASSERT_CTX_LOCKED(sc);
461 
462 	if (ice_pci_mapping(sc)) {
463 		err = (ENXIO);
464 		goto destroy_admin_timer;
465 	}
466 
467 	/* Save off the PCI information */
468 	ice_save_pci_info(hw, dev);
469 
470 	/* create tunables as early as possible */
471 	ice_add_device_tunables(sc);
472 
473 	/* Setup ControlQ lengths */
474 	ice_set_ctrlq_len(hw);
475 
476 	fw_mode = ice_get_fw_mode(hw);
477 	if (fw_mode == ICE_FW_MODE_REC) {
478 		device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
479 
480 		err = ice_attach_pre_recovery_mode(sc);
481 		if (err)
482 			goto free_pci_mapping;
483 
484 		return (0);
485 	}
486 
487 	/* Initialize the hw data structure */
488 	status = ice_init_hw(hw);
489 	if (status) {
490 		if (status == ICE_ERR_FW_API_VER) {
491 			/* Enter recovery mode, so that the driver remains
492 			 * loaded. This way, if the system administrator
493 			 * cannot update the driver, they may still attempt to
494 			 * downgrade the NVM.
495 			 */
496 			err = ice_attach_pre_recovery_mode(sc);
497 			if (err)
498 				goto free_pci_mapping;
499 
500 			return (0);
501 		} else {
502 			err = EIO;
503 			device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n",
504 				      ice_status_str(status),
505 				      ice_aq_str(hw->adminq.sq_last_status));
506 		}
507 		goto free_pci_mapping;
508 	}
509 
510 	/* Notify firmware of the device driver version */
511 	err = ice_send_version(sc);
512 	if (err)
513 		goto deinit_hw;
514 
515 	ice_load_pkg_file(sc);
516 
517 	err = ice_init_link_events(sc);
518 	if (err) {
519 		device_printf(dev, "ice_init_link_events failed: %s\n",
520 			      ice_err_str(err));
521 		goto deinit_hw;
522 	}
523 
524 	ice_print_nvm_version(sc);
525 
526 	ice_init_device_features(sc);
527 
528 	/* Setup the MAC address */
529 	iflib_set_mac(ctx, hw->port_info->mac.lan_addr);
530 
531 	/* Setup the iflib softc context structure */
532 	ice_setup_scctx(sc);
533 
534 	/* Initialize the Tx queue manager */
535 	err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq);
536 	if (err) {
537 		device_printf(dev, "Unable to initialize Tx queue manager: %s\n",
538 			      ice_err_str(err));
539 		goto deinit_hw;
540 	}
541 
542 	/* Initialize the Rx queue manager */
543 	err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq);
544 	if (err) {
545 		device_printf(dev, "Unable to initialize Rx queue manager: %s\n",
546 			      ice_err_str(err));
547 		goto free_tx_qmgr;
548 	}
549 
550 	/* Initialize the interrupt resource manager */
551 	err = ice_alloc_intr_tracking(sc);
552 	if (err)
553 		/* Errors are already printed */
554 		goto free_rx_qmgr;
555 
556 	/* Determine maximum number of VSIs we'll prepare for */
557 	sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE,
558 				    hw->func_caps.guar_num_vsi);
559 
560 	if (!sc->num_available_vsi) {
561 		err = EIO;
562 		device_printf(dev, "No VSIs allocated to host\n");
563 		goto free_intr_tracking;
564 	}
565 
566 	/* Allocate storage for the VSI pointers */
567 	sc->all_vsi = (struct ice_vsi **)
568 		malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi,
569 		       M_ICE, M_WAITOK | M_ZERO);
570 	if (!sc->all_vsi) {
571 		err = ENOMEM;
572 		device_printf(dev, "Unable to allocate VSI array\n");
573 		goto free_intr_tracking;
574 	}
575 
576 	/*
577 	 * Prepare the statically allocated primary PF VSI in the softc
578 	 * structure. Other VSIs will be dynamically allocated as needed.
579 	 */
580 	ice_setup_pf_vsi(sc);
581 
582 	err = ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max,
583 	    scctx->isc_nrxqsets_max);
584 	if (err) {
585 		device_printf(dev, "Unable to allocate VSI Queue maps\n");
586 		goto free_main_vsi;
587 	}
588 
589 	/* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */
590 	err = ice_allocate_msix(sc);
591 	if (err)
592 		goto free_main_vsi;
593 
594 	return 0;
595 
596 free_main_vsi:
597 	/* ice_release_vsi will free the queue maps if they were allocated */
598 	ice_release_vsi(&sc->pf_vsi);
599 	free(sc->all_vsi, M_ICE);
600 	sc->all_vsi = NULL;
601 free_intr_tracking:
602 	ice_free_intr_tracking(sc);
603 free_rx_qmgr:
604 	ice_resmgr_destroy(&sc->rx_qmgr);
605 free_tx_qmgr:
606 	ice_resmgr_destroy(&sc->tx_qmgr);
607 deinit_hw:
608 	ice_deinit_hw(hw);
609 free_pci_mapping:
610 	ice_free_pci_mapping(sc);
611 destroy_admin_timer:
612 	mtx_lock(&sc->admin_mtx);
613 	callout_stop(&sc->admin_timer);
614 	mtx_unlock(&sc->admin_mtx);
615 	mtx_destroy(&sc->admin_mtx);
616 	return err;
617 } /* ice_if_attach_pre */
618 
619 /**
620  * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery
621  * @sc: the device private softc
622  *
623  * Loads the device driver in limited Firmware Recovery mode, intended to
624  * allow users to update the firmware to attempt to recover the device.
625  *
626  * @remark We may enter recovery mode in case either (a) the firmware is
627  * detected to be in an invalid state and must be re-programmed, or (b) the
628  * driver detects that the loaded firmware has a non-compatible API version
629  * that the driver cannot operate with.
630  */
631 static int
632 ice_attach_pre_recovery_mode(struct ice_softc *sc)
633 {
634 	ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE);
635 
636 	/* Setup the iflib softc context */
637 	ice_setup_scctx(sc);
638 
639 	/* Setup the PF VSI back pointer */
640 	sc->pf_vsi.sc = sc;
641 
642 	/*
643 	 * We still need to allocate MSI-X vectors since we need one vector to
644 	 * run the administrative admin interrupt
645 	 */
646 	return ice_allocate_msix(sc);
647 }
648 
649 /**
650  * ice_update_link_status - notify OS of link state change
651  * @sc: device private softc structure
652  * @update_media: true if we should update media even if link didn't change
653  *
654  * Called to notify iflib core of link status changes. Should be called once
655  * during attach_post, and whenever link status changes during runtime.
656  *
657  * This call only updates the currently supported media types if the link
658  * status changed, or if update_media is set to true.
659  */
660 static void
661 ice_update_link_status(struct ice_softc *sc, bool update_media)
662 {
663 	struct ice_hw *hw = &sc->hw;
664 	enum ice_status status;
665 
666 	/* Never report link up when in recovery mode */
667 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
668 		return;
669 
670 	/* Report link status to iflib only once each time it changes */
671 	if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) {
672 		if (sc->link_up) { /* link is up */
673 			uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info);
674 
675 			ice_set_default_local_lldp_mib(sc);
676 
677 			iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
678 
679 			ice_link_up_msg(sc);
680 
681 			update_media = true;
682 		} else { /* link is down */
683 			iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
684 
685 			update_media = true;
686 		}
687 	}
688 
689 	/* Update the supported media types */
690 	if (update_media) {
691 		status = ice_add_media_types(sc, sc->media);
692 		if (status)
693 			device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
694 				      ice_status_str(status),
695 				      ice_aq_str(hw->adminq.sq_last_status));
696 	}
697 
698 	/* TODO: notify VFs of link state change */
699 }
700 
701 /**
702  * ice_if_attach_post - Late device attach logic
703  * @ctx: the iflib context structure
704  *
705  * Called by iflib to finish up attaching the device. Performs any attach
706  * logic which must wait until after the Tx and Rx queues have been
707  * allocated.
708  */
709 static int
710 ice_if_attach_post(if_ctx_t ctx)
711 {
712 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
713 	if_t ifp = iflib_get_ifp(ctx);
714 	int err;
715 
716 	ASSERT_CTX_LOCKED(sc);
717 
718 	/* We don't yet support loading if MSI-X is not supported */
719 	if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) {
720 		device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n");
721 		return (ENOTSUP);
722 	}
723 
724 	/* The ifnet structure hasn't yet been initialized when the attach_pre
725 	 * handler is called, so wait until attach_post to setup the
726 	 * isc_max_frame_size.
727 	 */
728 
729 	sc->ifp = ifp;
730 	sc->scctx->isc_max_frame_size = ifp->if_mtu +
731 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
732 
733 	/*
734 	 * If we are in recovery mode, only perform a limited subset of
735 	 * initialization to support NVM recovery.
736 	 */
737 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
738 		ice_attach_post_recovery_mode(sc);
739 		return (0);
740 	}
741 
742 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
743 
744 	err = ice_initialize_vsi(&sc->pf_vsi);
745 	if (err) {
746 		device_printf(sc->dev, "Unable to initialize Main VSI: %s\n",
747 			      ice_err_str(err));
748 		return err;
749 	}
750 
751 	/* Enable FW health event reporting */
752 	ice_init_health_events(sc);
753 
754 	/* Configure the main PF VSI for RSS */
755 	err = ice_config_rss(&sc->pf_vsi);
756 	if (err) {
757 		device_printf(sc->dev,
758 			      "Unable to configure RSS for the main VSI, err %s\n",
759 			      ice_err_str(err));
760 		return err;
761 	}
762 
763 	/* Configure switch to drop transmitted LLDP and PAUSE frames */
764 	err = ice_cfg_pf_ethertype_filters(sc);
765 	if (err)
766 		return err;
767 
768 	ice_get_and_print_bus_info(sc);
769 
770 	ice_set_link_management_mode(sc);
771 
772 	ice_init_saved_phy_cfg(sc);
773 
774 	ice_cfg_pba_num(sc);
775 
776 	ice_add_device_sysctls(sc);
777 
778 	/* Get DCBX/LLDP state and start DCBX agent */
779 	ice_init_dcb_setup(sc);
780 
781 	/* Setup link configuration parameters */
782 	ice_init_link_configuration(sc);
783 	ice_update_link_status(sc, true);
784 
785 	/* Configure interrupt causes for the administrative interrupt */
786 	ice_configure_misc_interrupts(sc);
787 
788 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
789 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
790 
791 	/* Start the admin timer */
792 	mtx_lock(&sc->admin_mtx);
793 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
794 	mtx_unlock(&sc->admin_mtx);
795 
796 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
797 
798 	return 0;
799 } /* ice_if_attach_post */
800 
801 /**
802  * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery
803  * @sc: the device private softc
804  *
805  * Performs minimal work to prepare the driver to recover an NVM in case the
806  * firmware is in recovery mode.
807  */
808 static void
809 ice_attach_post_recovery_mode(struct ice_softc *sc)
810 {
811 	/* Configure interrupt causes for the administrative interrupt */
812 	ice_configure_misc_interrupts(sc);
813 
814 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
815 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
816 
817 	/* Start the admin timer */
818 	mtx_lock(&sc->admin_mtx);
819 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
820 	mtx_unlock(&sc->admin_mtx);
821 
822 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
823 }
824 
825 /**
826  * ice_free_irqvs - Free IRQ vector memory
827  * @sc: the device private softc structure
828  *
829  * Free IRQ vector memory allocated during ice_if_msix_intr_assign.
830  */
831 static void
832 ice_free_irqvs(struct ice_softc *sc)
833 {
834 	struct ice_vsi *vsi = &sc->pf_vsi;
835 	if_ctx_t ctx = sc->ctx;
836 	int i;
837 
838 	/* If the irqvs array is NULL, then there are no vectors to free */
839 	if (sc->irqvs == NULL)
840 		return;
841 
842 	/* Free the IRQ vectors */
843 	for (i = 0; i < sc->num_irq_vectors; i++)
844 		iflib_irq_free(ctx, &sc->irqvs[i].irq);
845 
846 	/* Clear the irqv pointers */
847 	for (i = 0; i < vsi->num_rx_queues; i++)
848 		vsi->rx_queues[i].irqv = NULL;
849 
850 	for (i = 0; i < vsi->num_tx_queues; i++)
851 		vsi->tx_queues[i].irqv = NULL;
852 
853 	/* Release the vector array memory */
854 	free(sc->irqvs, M_ICE);
855 	sc->irqvs = NULL;
856 	sc->num_irq_vectors = 0;
857 }
858 
859 /**
860  * ice_if_detach - Device driver detach logic
861  * @ctx: iflib context structure
862  *
863  * Perform device shutdown logic to detach the device driver.
864  *
865  * Note that there is no guarantee of the ordering of ice_if_queues_free() and
866  * ice_if_detach(). It is possible for the functions to be called in either
867  * order, and they must not assume to have a strict ordering.
868  */
869 static int
870 ice_if_detach(if_ctx_t ctx)
871 {
872 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
873 	struct ice_vsi *vsi = &sc->pf_vsi;
874 	int i;
875 
876 	ASSERT_CTX_LOCKED(sc);
877 
878 	/* Indicate that we're detaching */
879 	ice_set_state(&sc->state, ICE_STATE_DETACHING);
880 
881 	/* Stop the admin timer */
882 	mtx_lock(&sc->admin_mtx);
883 	callout_stop(&sc->admin_timer);
884 	mtx_unlock(&sc->admin_mtx);
885 	mtx_destroy(&sc->admin_mtx);
886 
887 	/* Free allocated media types */
888 	ifmedia_removeall(sc->media);
889 
890 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
891 	 * pointers. Note, the calls here and those in ice_if_queues_free()
892 	 * are *BOTH* necessary, as we cannot guarantee which path will be
893 	 * run first
894 	 */
895 	ice_vsi_del_txqs_ctx(vsi);
896 	ice_vsi_del_rxqs_ctx(vsi);
897 
898 	/* Release MSI-X resources */
899 	ice_free_irqvs(sc);
900 
901 	for (i = 0; i < sc->num_available_vsi; i++) {
902 		if (sc->all_vsi[i])
903 			ice_release_vsi(sc->all_vsi[i]);
904 	}
905 
906 	if (sc->all_vsi) {
907 		free(sc->all_vsi, M_ICE);
908 		sc->all_vsi = NULL;
909 	}
910 
911 	/* Release MSI-X memory */
912 	pci_release_msi(sc->dev);
913 
914 	if (sc->msix_table != NULL) {
915 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
916 				     rman_get_rid(sc->msix_table),
917 				     sc->msix_table);
918 		sc->msix_table = NULL;
919 	}
920 
921 	ice_free_intr_tracking(sc);
922 
923 	/* Destroy the queue managers */
924 	ice_resmgr_destroy(&sc->tx_qmgr);
925 	ice_resmgr_destroy(&sc->rx_qmgr);
926 
927 	if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
928 		ice_deinit_hw(&sc->hw);
929 
930 	ice_free_pci_mapping(sc);
931 
932 	return 0;
933 } /* ice_if_detach */
934 
935 /**
936  * ice_if_tx_queues_alloc - Allocate Tx queue memory
937  * @ctx: iflib context structure
938  * @vaddrs: virtual addresses for the queue memory
939  * @paddrs: physical addresses for the queue memory
940  * @ntxqs: the number of Tx queues per set (should always be 1)
941  * @ntxqsets: the number of Tx queue sets to allocate
942  *
943  * Called by iflib to allocate Tx queues for the device. Allocates driver
944  * memory to track each queue, the status arrays used for descriptor
945  * status reporting, and Tx queue sysctls.
946  */
947 static int
948 ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
949 		       int __invariant_only ntxqs, int ntxqsets)
950 {
951 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
952 	struct ice_vsi *vsi = &sc->pf_vsi;
953 	struct ice_tx_queue *txq;
954 	int err, i, j;
955 
956 	MPASS(ntxqs == 1);
957 	MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
958 	ASSERT_CTX_LOCKED(sc);
959 
960 	/* Do not bother allocating queues if we're in recovery mode */
961 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
962 		return (0);
963 
964 	/* Allocate queue structure memory */
965 	if (!(vsi->tx_queues =
966 	      (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_WAITOK | M_ZERO))) {
967 		device_printf(sc->dev, "Unable to allocate Tx queue memory\n");
968 		return (ENOMEM);
969 	}
970 
971 	/* Allocate report status arrays */
972 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
973 		if (!(txq->tx_rsq =
974 		      (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_WAITOK))) {
975 			device_printf(sc->dev, "Unable to allocate tx_rsq memory\n");
976 			err = ENOMEM;
977 			goto free_tx_queues;
978 		}
979 		/* Initialize report status array */
980 		for (j = 0; j < sc->scctx->isc_ntxd[0]; j++)
981 			txq->tx_rsq[j] = QIDX_INVALID;
982 	}
983 
984 	/* Assign queues from PF space to the main VSI */
985 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets);
986 	if (err) {
987 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
988 			      ice_err_str(err));
989 		goto free_tx_queues;
990 	}
991 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
992 
993 	/* Add Tx queue sysctls context */
994 	ice_vsi_add_txqs_ctx(vsi);
995 
996 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
997 		/* q_handle == me when only one TC */
998 		txq->me = txq->q_handle = i;
999 		txq->vsi = vsi;
1000 
1001 		/* store the queue size for easier access */
1002 		txq->desc_count = sc->scctx->isc_ntxd[0];
1003 
1004 		/* get the virtual and physical address of the hardware queues */
1005 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
1006 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
1007 		txq->tx_paddr = paddrs[i];
1008 
1009 		ice_add_txq_sysctls(txq);
1010 	}
1011 
1012 	vsi->num_tx_queues = ntxqsets;
1013 
1014 	return (0);
1015 
1016 free_tx_queues:
1017 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1018 		if (txq->tx_rsq != NULL) {
1019 			free(txq->tx_rsq, M_ICE);
1020 			txq->tx_rsq = NULL;
1021 		}
1022 	}
1023 	free(vsi->tx_queues, M_ICE);
1024 	vsi->tx_queues = NULL;
1025 	return err;
1026 }
1027 
1028 /**
1029  * ice_if_rx_queues_alloc - Allocate Rx queue memory
1030  * @ctx: iflib context structure
1031  * @vaddrs: virtual addresses for the queue memory
1032  * @paddrs: physical addresses for the queue memory
1033  * @nrxqs: number of Rx queues per set (should always be 1)
1034  * @nrxqsets: number of Rx queue sets to allocate
1035  *
1036  * Called by iflib to allocate Rx queues for the device. Allocates driver
1037  * memory to track each queue, as well as sets up the Rx queue sysctls.
1038  */
1039 static int
1040 ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1041 		       int __invariant_only nrxqs, int nrxqsets)
1042 {
1043 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1044 	struct ice_vsi *vsi = &sc->pf_vsi;
1045 	struct ice_rx_queue *rxq;
1046 	int err, i;
1047 
1048 	MPASS(nrxqs == 1);
1049 	MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
1050 	ASSERT_CTX_LOCKED(sc);
1051 
1052 	/* Do not bother allocating queues if we're in recovery mode */
1053 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1054 		return (0);
1055 
1056 	/* Allocate queue structure memory */
1057 	if (!(vsi->rx_queues =
1058 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_WAITOK | M_ZERO))) {
1059 		device_printf(sc->dev, "Unable to allocate Rx queue memory\n");
1060 		return (ENOMEM);
1061 	}
1062 
1063 	/* Assign queues from PF space to the main VSI */
1064 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets);
1065 	if (err) {
1066 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1067 			      ice_err_str(err));
1068 		goto free_rx_queues;
1069 	}
1070 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1071 
1072 	/* Add Rx queue sysctls context */
1073 	ice_vsi_add_rxqs_ctx(vsi);
1074 
1075 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
1076 		rxq->me = i;
1077 		rxq->vsi = vsi;
1078 
1079 		/* store the queue size for easier access */
1080 		rxq->desc_count = sc->scctx->isc_nrxd[0];
1081 
1082 		/* get the virtual and physical address of the hardware queues */
1083 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
1084 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
1085 		rxq->rx_paddr = paddrs[i];
1086 
1087 		ice_add_rxq_sysctls(rxq);
1088 	}
1089 
1090 	vsi->num_rx_queues = nrxqsets;
1091 
1092 	return (0);
1093 
1094 free_rx_queues:
1095 	free(vsi->rx_queues, M_ICE);
1096 	vsi->rx_queues = NULL;
1097 	return err;
1098 }
1099 
1100 /**
1101  * ice_if_queues_free - Free queue memory
1102  * @ctx: the iflib context structure
1103  *
1104  * Free queue memory allocated by ice_if_tx_queues_alloc() and
1105  * ice_if_rx_queues_alloc().
1106  *
1107  * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be
1108  * called in the same order. It's possible for ice_if_queues_free() to be
1109  * called prior to ice_if_detach(), and vice versa.
1110  *
1111  * For this reason, the main VSI is a static member of the ice_softc, which is
1112  * not free'd until after iflib finishes calling both of these functions.
1113  *
1114  * Thus, care must be taken in how we manage the memory being freed by this
1115  * function, and in what tasks it can and must perform.
1116  */
1117 static void
1118 ice_if_queues_free(if_ctx_t ctx)
1119 {
1120 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1121 	struct ice_vsi *vsi = &sc->pf_vsi;
1122 	struct ice_tx_queue *txq;
1123 	int i;
1124 
1125 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
1126 	 * pointers. Note, the calls here and those in ice_if_detach()
1127 	 * are *BOTH* necessary, as we cannot guarantee which path will be
1128 	 * run first
1129 	 */
1130 	ice_vsi_del_txqs_ctx(vsi);
1131 	ice_vsi_del_rxqs_ctx(vsi);
1132 
1133 	/* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */
1134 	ice_free_irqvs(sc);
1135 
1136 	if (vsi->tx_queues != NULL) {
1137 		/* free the tx_rsq arrays */
1138 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1139 			if (txq->tx_rsq != NULL) {
1140 				free(txq->tx_rsq, M_ICE);
1141 				txq->tx_rsq = NULL;
1142 			}
1143 		}
1144 		free(vsi->tx_queues, M_ICE);
1145 		vsi->tx_queues = NULL;
1146 		vsi->num_tx_queues = 0;
1147 	}
1148 	if (vsi->rx_queues != NULL) {
1149 		free(vsi->rx_queues, M_ICE);
1150 		vsi->rx_queues = NULL;
1151 		vsi->num_rx_queues = 0;
1152 	}
1153 }
1154 
1155 /**
1156  * ice_msix_que - Fast interrupt handler for MSI-X receive queues
1157  * @arg: The Rx queue memory
1158  *
1159  * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when
1160  * an MSI-X interrupt for a given queue is triggered. Currently this just asks
1161  * iflib to schedule the main Rx thread.
1162  */
1163 static int
1164 ice_msix_que(void *arg)
1165 {
1166 	struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg;
1167 
1168 	/* TODO: dynamic ITR algorithm?? */
1169 
1170 	return (FILTER_SCHEDULE_THREAD);
1171 }
1172 
1173 /**
1174  * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt
1175  * @arg: pointer to device softc memory
1176  *
1177  * Called by iflib when an administrative interrupt occurs. Should perform any
1178  * fast logic for handling the interrupt cause, and then indicate whether the
1179  * admin task needs to be queued.
1180  */
1181 static int
1182 ice_msix_admin(void *arg)
1183 {
1184 	struct ice_softc *sc = (struct ice_softc *)arg;
1185 	struct ice_hw *hw = &sc->hw;
1186 	device_t dev = sc->dev;
1187 	u32 oicr;
1188 
1189 	/* There is no safe way to modify the enabled miscellaneous causes of
1190 	 * the OICR vector at runtime, as doing so would be prone to race
1191 	 * conditions. Reading PFINT_OICR will unmask the associated interrupt
1192 	 * causes and allow future interrupts to occur. The admin interrupt
1193 	 * vector will not be re-enabled until after we exit this function,
1194 	 * but any delayed tasks must be resilient against possible "late
1195 	 * arrival" interrupts that occur while we're already handling the
1196 	 * task. This is done by using state bits and serializing these
1197 	 * delayed tasks via the admin status task function.
1198 	 */
1199 	oicr = rd32(hw, PFINT_OICR);
1200 
1201 	/* Processing multiple controlq interrupts on a single vector does not
1202 	 * provide an indication of which controlq triggered the interrupt.
1203 	 * We might try reading the INTEVENT bit of the respective PFINT_*_CTL
1204 	 * registers. However, the INTEVENT bit is not guaranteed to be set as
1205 	 * it gets automatically cleared when the hardware acknowledges the
1206 	 * interrupt.
1207 	 *
1208 	 * This means we don't really have a good indication of whether or
1209 	 * which controlq triggered this interrupt. We'll just notify the
1210 	 * admin task that it should check all the controlqs.
1211 	 */
1212 	ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
1213 
1214 	if (oicr & PFINT_OICR_VFLR_M) {
1215 		ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING);
1216 	}
1217 
1218 	if (oicr & PFINT_OICR_MAL_DETECT_M) {
1219 		ice_set_state(&sc->state, ICE_STATE_MDD_PENDING);
1220 	}
1221 
1222 	if (oicr & PFINT_OICR_GRST_M) {
1223 		u32 reset;
1224 
1225 		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
1226 			GLGEN_RSTAT_RESET_TYPE_S;
1227 
1228 		if (reset == ICE_RESET_CORER)
1229 			sc->soft_stats.corer_count++;
1230 		else if (reset == ICE_RESET_GLOBR)
1231 			sc->soft_stats.globr_count++;
1232 		else
1233 			sc->soft_stats.empr_count++;
1234 
1235 		/* There are a couple of bits at play for handling resets.
1236 		 * First, the ICE_STATE_RESET_OICR_RECV bit is used to
1237 		 * indicate that the driver has received an OICR with a reset
1238 		 * bit active, indicating that a CORER/GLOBR/EMPR is about to
1239 		 * happen. Second, we set hw->reset_ongoing to indicate that
1240 		 * the hardware is in reset. We will set this back to false as
1241 		 * soon as the driver has determined that the hardware is out
1242 		 * of reset.
1243 		 *
1244 		 * If the driver wishes to trigger a reqest, it can set one of
1245 		 * the ICE_STATE_RESET_*_REQ bits, which will trigger the
1246 		 * correct type of reset.
1247 		 */
1248 		if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
1249 			hw->reset_ongoing = true;
1250 	}
1251 
1252 	if (oicr & PFINT_OICR_ECC_ERR_M) {
1253 		device_printf(dev, "ECC Error detected!\n");
1254 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1255 	}
1256 
1257 	if (oicr & PFINT_OICR_PE_CRITERR_M) {
1258 		device_printf(dev, "Critical Protocol Engine Error detected!\n");
1259 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1260 	}
1261 
1262 	if (oicr & PFINT_OICR_PCI_EXCEPTION_M) {
1263 		device_printf(dev, "PCI Exception detected!\n");
1264 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1265 	}
1266 
1267 	if (oicr & PFINT_OICR_HMC_ERR_M) {
1268 		/* Log the HMC errors, but don't disable the interrupt cause */
1269 		ice_log_hmc_error(hw, dev);
1270 	}
1271 
1272 	return (FILTER_SCHEDULE_THREAD);
1273 }
1274 
1275 /**
1276  * ice_allocate_msix - Allocate MSI-X vectors for the interface
1277  * @sc: the device private softc
1278  *
1279  * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process.
1280  *
1281  * First, determine a suitable total number of vectors based on the number
1282  * of CPUs, RSS buckets, the administrative vector, and other demands such as
1283  * RDMA.
1284  *
1285  * Request the desired amount of vectors, and see how many we obtain. If we
1286  * don't obtain as many as desired, reduce the demands by lowering the number
1287  * of requested queues or reducing the demand from other features such as
1288  * RDMA.
1289  *
1290  * @remark This function is required because the driver sets the
1291  * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors
1292  * manually.
1293  *
1294  * @remark This driver will only use MSI-X vectors. If this is not possible,
1295  * neither MSI or legacy interrupts will be tried.
1296  *
1297  * @post on success this function must set the following scctx parameters:
1298  * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr.
1299  *
1300  * @returns zero on success or an error code on failure.
1301  */
1302 static int
1303 ice_allocate_msix(struct ice_softc *sc)
1304 {
1305 	bool iflib_override_queue_count = false;
1306 	if_softc_ctx_t scctx = sc->scctx;
1307 	device_t dev = sc->dev;
1308 	cpuset_t cpus;
1309 	int bar, queues, vectors, requested;
1310 	int err = 0;
1311 
1312 	/* Allocate the MSI-X bar */
1313 	bar = scctx->isc_msix_bar;
1314 	sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE);
1315 	if (!sc->msix_table) {
1316 		device_printf(dev, "Unable to map MSI-X table\n");
1317 		return (ENOMEM);
1318 	}
1319 
1320 	/* Check if the iflib queue count sysctls have been set */
1321 	if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs)
1322 		iflib_override_queue_count = true;
1323 
1324 	err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus);
1325 	if (err) {
1326 		device_printf(dev, "%s: Unable to fetch the CPU list: %s\n",
1327 			      __func__, ice_err_str(err));
1328 		CPU_COPY(&all_cpus, &cpus);
1329 	}
1330 
1331 	/* Attempt to mimic behavior of iflib_msix_init */
1332 	if (iflib_override_queue_count) {
1333 		/*
1334 		 * If the override sysctls have been set, limit the queues to
1335 		 * the number of logical CPUs.
1336 		 */
1337 		queues = mp_ncpus;
1338 	} else {
1339 		/*
1340 		 * Otherwise, limit the queue count to the CPUs associated
1341 		 * with the NUMA node the device is associated with.
1342 		 */
1343 		queues = CPU_COUNT(&cpus);
1344 	}
1345 
1346 	/* Clamp to the number of RSS buckets */
1347 	queues = imin(queues, rss_getnumbuckets());
1348 
1349 	/*
1350 	 * Clamp the number of queue pairs to the minimum of the requested Tx
1351 	 * and Rx queues.
1352 	 */
1353 	queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
1354 	queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
1355 
1356 	/*
1357 	 * Determine the number of vectors to request. Note that we also need
1358 	 * to allocate one vector for administrative tasks.
1359 	 */
1360 	requested = queues + 1;
1361 
1362 	vectors = requested;
1363 
1364 	err = pci_alloc_msix(dev, &vectors);
1365 	if (err) {
1366 		device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n",
1367 			      vectors, ice_err_str(err));
1368 		goto err_free_msix_table;
1369 	}
1370 
1371 	/* If we don't receive enough vectors, reduce demands */
1372 	if (vectors < requested) {
1373 		int diff = requested - vectors;
1374 
1375 		device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n",
1376 			      requested, vectors);
1377 
1378 		/*
1379 		 * If we still have a difference, we need to reduce the number
1380 		 * of queue pairs.
1381 		 *
1382 		 * However, we still need at least one vector for the admin
1383 		 * interrupt and one queue pair.
1384 		 */
1385 		if (queues <= diff) {
1386 			device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n");
1387 			err = (ERANGE);
1388 			goto err_pci_release_msi;
1389 		}
1390 
1391 		queues -= diff;
1392 	}
1393 
1394 	device_printf(dev, "Using %d Tx and Rx queues\n", queues);
1395 	device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
1396 		      vectors);
1397 
1398 	scctx->isc_vectors = vectors;
1399 	scctx->isc_nrxqsets = queues;
1400 	scctx->isc_ntxqsets = queues;
1401 	scctx->isc_intr = IFLIB_INTR_MSIX;
1402 
1403 	/* Interrupt allocation tracking isn't required in recovery mode,
1404 	 * since neither RDMA nor VFs are enabled.
1405 	 */
1406 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1407 		return (0);
1408 
1409 	/* Keep track of which interrupt indices are being used for what */
1410 	sc->lan_vectors = vectors;
1411 	err = ice_resmgr_assign_contiguous(&sc->imgr, sc->pf_imap, sc->lan_vectors);
1412 	if (err) {
1413 		device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
1414 			      ice_err_str(err));
1415 		goto err_pci_release_msi;
1416 	}
1417 
1418 	return (0);
1419 
1420 err_pci_release_msi:
1421 	pci_release_msi(dev);
1422 err_free_msix_table:
1423 	if (sc->msix_table != NULL) {
1424 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
1425 				rman_get_rid(sc->msix_table),
1426 				sc->msix_table);
1427 		sc->msix_table = NULL;
1428 	}
1429 
1430 	return (err);
1431 }
1432 
1433 /**
1434  * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues
1435  * @ctx: the iflib context structure
1436  * @msix: the number of vectors we were assigned
1437  *
1438  * Called by iflib to assign MSI-X vectors to queues. Currently requires that
1439  * we get at least the same number of vectors as we have queues, and that we
1440  * always have the same number of Tx and Rx queues.
1441  *
1442  * Tx queues use a softirq instead of using their own hardware interrupt.
1443  */
1444 static int
1445 ice_if_msix_intr_assign(if_ctx_t ctx, int msix)
1446 {
1447 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1448 	struct ice_vsi *vsi = &sc->pf_vsi;
1449 	int err, i, vector;
1450 
1451 	ASSERT_CTX_LOCKED(sc);
1452 
1453 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
1454 		device_printf(sc->dev,
1455 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
1456 			      vsi->num_tx_queues, vsi->num_rx_queues);
1457 		return (EOPNOTSUPP);
1458 	}
1459 
1460 	if (msix < (vsi->num_rx_queues + 1)) {
1461 		device_printf(sc->dev,
1462 			      "Not enough MSI-X vectors to assign one vector to each queue pair\n");
1463 		return (EOPNOTSUPP);
1464 	}
1465 
1466 	/* Save the number of vectors for future use */
1467 	sc->num_irq_vectors = vsi->num_rx_queues + 1;
1468 
1469 	/* Allocate space to store the IRQ vector data */
1470 	if (!(sc->irqvs =
1471 	      (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors),
1472 					       M_ICE, M_NOWAIT))) {
1473 		device_printf(sc->dev,
1474 			      "Unable to allocate irqv memory\n");
1475 		return (ENOMEM);
1476 	}
1477 
1478 	/* Administrative interrupt events will use vector 0 */
1479 	err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN,
1480 				      ice_msix_admin, sc, 0, "admin");
1481 	if (err) {
1482 		device_printf(sc->dev,
1483 			      "Failed to register Admin queue handler: %s\n",
1484 			      ice_err_str(err));
1485 		goto free_irqvs;
1486 	}
1487 	sc->irqvs[0].me = 0;
1488 
1489 	/* Do not allocate queue interrupts when in recovery mode */
1490 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1491 		return (0);
1492 
1493 	for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) {
1494 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1495 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1496 		int rid = vector + 1;
1497 		char irq_name[16];
1498 
1499 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
1500 		err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid,
1501 					      IFLIB_INTR_RXTX, ice_msix_que,
1502 					      rxq, rxq->me, irq_name);
1503 		if (err) {
1504 			device_printf(sc->dev,
1505 				      "Failed to allocate q int %d err: %s\n",
1506 				      i, ice_err_str(err));
1507 			vector--;
1508 			i--;
1509 			goto fail;
1510 		}
1511 		sc->irqvs[vector].me = vector;
1512 		rxq->irqv = &sc->irqvs[vector];
1513 
1514 		bzero(irq_name, sizeof(irq_name));
1515 
1516 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
1517 		iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq,
1518 					    IFLIB_INTR_TX, txq,
1519 					    txq->me, irq_name);
1520 		txq->irqv = &sc->irqvs[vector];
1521 	}
1522 
1523 	return (0);
1524 fail:
1525 	for (; i >= 0; i--, vector--)
1526 		iflib_irq_free(ctx, &sc->irqvs[vector].irq);
1527 	iflib_irq_free(ctx, &sc->irqvs[0].irq);
1528 free_irqvs:
1529 	free(sc->irqvs, M_ICE);
1530 	sc->irqvs = NULL;
1531 	return err;
1532 }
1533 
1534 /**
1535  * ice_if_mtu_set - Set the device MTU
1536  * @ctx: iflib context structure
1537  * @mtu: the MTU requested
1538  *
1539  * Called by iflib to configure the device's Maximum Transmission Unit (MTU).
1540  *
1541  * @pre assumes the caller holds the iflib CTX lock
1542  */
1543 static int
1544 ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu)
1545 {
1546 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1547 
1548 	ASSERT_CTX_LOCKED(sc);
1549 
1550 	/* Do not support configuration when in recovery mode */
1551 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1552 		return (ENOSYS);
1553 
1554 	if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU)
1555 		return (EINVAL);
1556 
1557 	sc->scctx->isc_max_frame_size = mtu +
1558 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1559 
1560 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
1561 
1562 	return (0);
1563 }
1564 
1565 /**
1566  * ice_if_intr_enable - Enable device interrupts
1567  * @ctx: iflib context structure
1568  *
1569  * Called by iflib to request enabling device interrupts.
1570  */
1571 static void
1572 ice_if_intr_enable(if_ctx_t ctx)
1573 {
1574 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1575 	struct ice_vsi *vsi = &sc->pf_vsi;
1576 	struct ice_hw *hw = &sc->hw;
1577 
1578 	ASSERT_CTX_LOCKED(sc);
1579 
1580 	/* Enable ITR 0 */
1581 	ice_enable_intr(hw, sc->irqvs[0].me);
1582 
1583 	/* Do not enable queue interrupts in recovery mode */
1584 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1585 		return;
1586 
1587 	/* Enable all queue interrupts */
1588 	for (int i = 0; i < vsi->num_rx_queues; i++)
1589 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
1590 }
1591 
1592 /**
1593  * ice_if_intr_disable - Disable device interrupts
1594  * @ctx: iflib context structure
1595  *
1596  * Called by iflib to request disabling device interrupts.
1597  */
1598 static void
1599 ice_if_intr_disable(if_ctx_t ctx)
1600 {
1601 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1602 	struct ice_hw *hw = &sc->hw;
1603 	unsigned int i;
1604 
1605 	ASSERT_CTX_LOCKED(sc);
1606 
1607 	/* IFDI_INTR_DISABLE may be called prior to interrupts actually being
1608 	 * assigned to queues. Instead of assuming that the interrupt
1609 	 * assignment in the rx_queues structure is valid, just disable all
1610 	 * possible interrupts
1611 	 *
1612 	 * Note that we choose not to disable ITR 0 because this handles the
1613 	 * AdminQ interrupts, and we want to keep processing these even when
1614 	 * the interface is offline.
1615 	 */
1616 	for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++)
1617 		ice_disable_intr(hw, i);
1618 }
1619 
1620 /**
1621  * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
1622  * @ctx: iflib context structure
1623  * @rxqid: the Rx queue to enable
1624  *
1625  * Enable a specific Rx queue interrupt.
1626  *
1627  * This function is not protected by the iflib CTX lock.
1628  */
1629 static int
1630 ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
1631 {
1632 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1633 	struct ice_vsi *vsi = &sc->pf_vsi;
1634 	struct ice_hw *hw = &sc->hw;
1635 
1636 	/* Do not enable queue interrupts in recovery mode */
1637 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1638 		return (ENOSYS);
1639 
1640 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
1641 	return (0);
1642 }
1643 
1644 /**
1645  * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
1646  * @ctx: iflib context structure
1647  * @txqid: the Tx queue to enable
1648  *
1649  * Enable a specific Tx queue interrupt.
1650  *
1651  * This function is not protected by the iflib CTX lock.
1652  */
1653 static int
1654 ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
1655 {
1656 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1657 	struct ice_vsi *vsi = &sc->pf_vsi;
1658 	struct ice_hw *hw = &sc->hw;
1659 
1660 	/* Do not enable queue interrupts in recovery mode */
1661 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1662 		return (ENOSYS);
1663 
1664 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
1665 	return (0);
1666 }
1667 
1668 /**
1669  * ice_if_promisc_set - Set device promiscuous mode
1670  * @ctx: iflib context structure
1671  * @flags: promiscuous flags to configure
1672  *
1673  * Called by iflib to configure device promiscuous mode.
1674  *
1675  * @remark Calls to this function will always overwrite the previous setting
1676  */
1677 static int
1678 ice_if_promisc_set(if_ctx_t ctx, int flags)
1679 {
1680 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1681 	struct ice_hw *hw = &sc->hw;
1682 	device_t dev = sc->dev;
1683 	enum ice_status status;
1684 	bool promisc_enable = flags & IFF_PROMISC;
1685 	bool multi_enable = flags & IFF_ALLMULTI;
1686 
1687 	/* Do not support configuration when in recovery mode */
1688 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1689 		return (ENOSYS);
1690 
1691 	if (multi_enable)
1692 		return (EOPNOTSUPP);
1693 
1694 	if (promisc_enable) {
1695 		status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx,
1696 					     ICE_VSI_PROMISC_MASK, 0);
1697 		if (status && status != ICE_ERR_ALREADY_EXISTS) {
1698 			device_printf(dev,
1699 				      "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n",
1700 				      ice_status_str(status),
1701 				      ice_aq_str(hw->adminq.sq_last_status));
1702 			return (EIO);
1703 		}
1704 	} else {
1705 		status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx,
1706 					       ICE_VSI_PROMISC_MASK, 0);
1707 		if (status) {
1708 			device_printf(dev,
1709 				      "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n",
1710 				      ice_status_str(status),
1711 				      ice_aq_str(hw->adminq.sq_last_status));
1712 			return (EIO);
1713 		}
1714 	}
1715 
1716 	return (0);
1717 }
1718 
1719 /**
1720  * ice_if_media_change - Change device media
1721  * @ctx: device ctx structure
1722  *
1723  * Called by iflib when a media change is requested. This operation is not
1724  * supported by the hardware, so we just return an error code.
1725  */
1726 static int
1727 ice_if_media_change(if_ctx_t ctx)
1728 {
1729 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1730 
1731 	device_printf(sc->dev, "Media change is not supported.\n");
1732 	return (ENODEV);
1733 }
1734 
1735 /**
1736  * ice_if_media_status - Report current device media
1737  * @ctx: iflib context structure
1738  * @ifmr: ifmedia request structure to update
1739  *
1740  * Updates the provided ifmr with current device media status, including link
1741  * status and media type.
1742  */
1743 static void
1744 ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr)
1745 {
1746 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1747 	struct ice_link_status *li = &sc->hw.port_info->phy.link_info;
1748 
1749 	ifmr->ifm_status = IFM_AVALID;
1750 	ifmr->ifm_active = IFM_ETHER;
1751 
1752 	/* Never report link up or media types when in recovery mode */
1753 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1754 		return;
1755 
1756 	if (!sc->link_up)
1757 		return;
1758 
1759 	ifmr->ifm_status |= IFM_ACTIVE;
1760 	ifmr->ifm_active |= IFM_FDX;
1761 
1762 	if (li->phy_type_low)
1763 		ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low);
1764 	else if (li->phy_type_high)
1765 		ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high);
1766 	else
1767 		ifmr->ifm_active |= IFM_UNKNOWN;
1768 
1769 	/* Report flow control status as well */
1770 	if (li->an_info & ICE_AQ_LINK_PAUSE_TX)
1771 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1772 	if (li->an_info & ICE_AQ_LINK_PAUSE_RX)
1773 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1774 }
1775 
1776 /**
1777  * ice_init_tx_tracking - Initialize Tx queue software tracking values
1778  * @vsi: the VSI to initialize
1779  *
1780  * Initialize Tx queue software tracking values, including the Report Status
1781  * queue, and related software tracking values.
1782  */
1783 static void
1784 ice_init_tx_tracking(struct ice_vsi *vsi)
1785 {
1786 	struct ice_tx_queue *txq;
1787 	size_t j;
1788 	int i;
1789 
1790 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1791 
1792 		txq->tx_rs_cidx = txq->tx_rs_pidx = 0;
1793 
1794 		/* Initialize the last processed descriptor to be the end of
1795 		 * the ring, rather than the start, so that we avoid an
1796 		 * off-by-one error in ice_ift_txd_credits_update for the
1797 		 * first packet.
1798 		 */
1799 		txq->tx_cidx_processed = txq->desc_count - 1;
1800 
1801 		for (j = 0; j < txq->desc_count; j++)
1802 			txq->tx_rsq[j] = QIDX_INVALID;
1803 	}
1804 }
1805 
1806 /**
1807  * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues
1808  * @sc: the device softc
1809  *
1810  * Called to update the Rx queue mbuf_sz parameter for configuring the receive
1811  * buffer sizes when programming hardware.
1812  */
1813 static void
1814 ice_update_rx_mbuf_sz(struct ice_softc *sc)
1815 {
1816 	uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx);
1817 	struct ice_vsi *vsi = &sc->pf_vsi;
1818 
1819 	MPASS(mbuf_sz <= UINT16_MAX);
1820 	vsi->mbuf_sz = mbuf_sz;
1821 }
1822 
1823 /**
1824  * ice_if_init - Initialize the device
1825  * @ctx: iflib ctx structure
1826  *
1827  * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes
1828  * device filters and prepares the Tx and Rx engines.
1829  *
1830  * @pre assumes the caller holds the iflib CTX lock
1831  */
1832 static void
1833 ice_if_init(if_ctx_t ctx)
1834 {
1835 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1836 	device_t dev = sc->dev;
1837 	int err;
1838 
1839 	ASSERT_CTX_LOCKED(sc);
1840 
1841 	/*
1842 	 * We've seen an issue with 11.3/12.1 where sideband routines are
1843 	 * called after detach is called.  This would call routines after
1844 	 * if_stop, causing issues with the teardown process.  This has
1845 	 * seemingly been fixed in STABLE snapshots, but it seems like a
1846 	 * good idea to have this guard here regardless.
1847 	 */
1848 	if (ice_driver_is_detaching(sc))
1849 		return;
1850 
1851 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1852 		return;
1853 
1854 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
1855 		device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n");
1856 		return;
1857 	}
1858 
1859 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
1860 		device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n");
1861 		return;
1862 	}
1863 
1864 	ice_update_rx_mbuf_sz(sc);
1865 
1866 	/* Update the MAC address... User might use a LAA */
1867 	err = ice_update_laa_mac(sc);
1868 	if (err) {
1869 		device_printf(dev,
1870 			      "LAA address change failed, err %s\n",
1871 			      ice_err_str(err));
1872 		return;
1873 	}
1874 
1875 	/* Initialize software Tx tracking values */
1876 	ice_init_tx_tracking(&sc->pf_vsi);
1877 
1878 	err = ice_cfg_vsi_for_tx(&sc->pf_vsi);
1879 	if (err) {
1880 		device_printf(dev,
1881 			      "Unable to configure the main VSI for Tx: %s\n",
1882 			      ice_err_str(err));
1883 		return;
1884 	}
1885 
1886 	err = ice_cfg_vsi_for_rx(&sc->pf_vsi);
1887 	if (err) {
1888 		device_printf(dev,
1889 			      "Unable to configure the main VSI for Rx: %s\n",
1890 			      ice_err_str(err));
1891 		goto err_cleanup_tx;
1892 	}
1893 
1894 	err = ice_control_rx_queues(&sc->pf_vsi, true);
1895 	if (err) {
1896 		device_printf(dev,
1897 			      "Unable to enable Rx rings for transmit: %s\n",
1898 			      ice_err_str(err));
1899 		goto err_cleanup_tx;
1900 	}
1901 
1902 	err = ice_cfg_pf_default_mac_filters(sc);
1903 	if (err) {
1904 		device_printf(dev,
1905 			      "Unable to configure default MAC filters: %s\n",
1906 			      ice_err_str(err));
1907 		goto err_stop_rx;
1908 	}
1909 
1910 	/* We use software interrupts for Tx, so we only program the hardware
1911 	 * interrupts for Rx.
1912 	 */
1913 	ice_configure_rxq_interrupts(&sc->pf_vsi);
1914 	ice_configure_rx_itr(&sc->pf_vsi);
1915 
1916 	/* Configure promiscuous mode */
1917 	ice_if_promisc_set(ctx, if_getflags(sc->ifp));
1918 
1919 	ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
1920 	return;
1921 
1922 err_stop_rx:
1923 	ice_control_rx_queues(&sc->pf_vsi, false);
1924 err_cleanup_tx:
1925 	ice_vsi_disable_tx(&sc->pf_vsi);
1926 }
1927 
1928 /**
1929  * ice_poll_for_media_avail - Re-enable link if media is detected
1930  * @sc: device private structure
1931  *
1932  * Intended to be called from the driver's timer function, this function
1933  * sends the Get Link Status AQ command and re-enables HW link if the
1934  * command says that media is available.
1935  *
1936  * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing,
1937  * since media removal events are supposed to be sent to the driver through
1938  * a link status event.
1939  */
1940 static void
1941 ice_poll_for_media_avail(struct ice_softc *sc)
1942 {
1943 	struct ice_hw *hw = &sc->hw;
1944 	struct ice_port_info *pi = hw->port_info;
1945 
1946 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) {
1947 		pi->phy.get_link_info = true;
1948 		ice_get_link_status(pi, &sc->link_up);
1949 
1950 		if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
1951 			enum ice_status status;
1952 
1953 			/* Re-enable link and re-apply user link settings */
1954 			ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
1955 
1956 			/* Update the OS about changes in media capability */
1957 			status = ice_add_media_types(sc, sc->media);
1958 			if (status)
1959 				device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
1960 					      ice_status_str(status),
1961 					      ice_aq_str(hw->adminq.sq_last_status));
1962 
1963 			ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
1964 		}
1965 	}
1966 }
1967 
1968 /**
1969  * ice_if_timer - called by iflib periodically
1970  * @ctx: iflib ctx structure
1971  * @qid: the queue this timer was called for
1972  *
1973  * This callback is triggered by iflib periodically. We use it to update the
1974  * hw statistics.
1975  *
1976  * @remark this function is not protected by the iflib CTX lock.
1977  */
1978 static void
1979 ice_if_timer(if_ctx_t ctx, uint16_t qid)
1980 {
1981 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1982 	uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx;
1983 
1984 	if (qid != 0)
1985 		return;
1986 
1987 	/* Do not attempt to update stats when in recovery mode */
1988 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1989 		return;
1990 
1991 	/* Update device statistics */
1992 	ice_update_pf_stats(sc);
1993 
1994 	/*
1995 	 * For proper watchdog management, the iflib stack needs to know if
1996 	 * we've been paused during the last interval. Check if the
1997 	 * link_xoff_rx stat changed, and set the isc_pause_frames, if so.
1998 	 */
1999 	if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx)
2000 		sc->scctx->isc_pause_frames = 1;
2001 
2002 	/* Update the primary VSI stats */
2003 	ice_update_vsi_hw_stats(&sc->pf_vsi);
2004 }
2005 
2006 /**
2007  * ice_admin_timer - called periodically to trigger the admin task
2008  * @arg: callout(9) argument pointing to the device private softc structure
2009  *
2010  * Timer function used as part of a callout(9) timer that will periodically
2011  * trigger the admin task, even when the interface is down.
2012  *
2013  * @remark this function is not called by iflib and is not protected by the
2014  * iflib CTX lock.
2015  *
2016  * @remark because this is a callout function, it cannot sleep and should not
2017  * attempt taking the iflib CTX lock.
2018  */
2019 static void
2020 ice_admin_timer(void *arg)
2021 {
2022 	struct ice_softc *sc = (struct ice_softc *)arg;
2023 
2024 	/*
2025 	 * There is a point where callout routines are no longer
2026 	 * cancelable.  So there exists a window of time where the
2027 	 * driver enters detach() and tries to cancel the callout, but the
2028 	 * callout routine has passed the cancellation point.  The detach()
2029 	 * routine is unaware of this and tries to free resources that the
2030 	 * callout routine needs.  So we check for the detach state flag to
2031 	 * at least shrink the window of opportunity.
2032 	 */
2033 	if (ice_driver_is_detaching(sc))
2034 		return;
2035 
2036 	/* Fire off the admin task */
2037 	iflib_admin_intr_deferred(sc->ctx);
2038 
2039 	/* Reschedule the admin timer */
2040 	callout_schedule(&sc->admin_timer, hz/2);
2041 }
2042 
2043 /**
2044  * ice_transition_recovery_mode - Transition to recovery mode
2045  * @sc: the device private softc
2046  *
2047  * Called when the driver detects that the firmware has entered recovery mode
2048  * at run time.
2049  */
2050 static void
2051 ice_transition_recovery_mode(struct ice_softc *sc)
2052 {
2053 	struct ice_vsi *vsi = &sc->pf_vsi;
2054 	int i;
2055 
2056 	device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
2057 
2058 	/* Tell the stack that the link has gone down */
2059 	iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
2060 
2061 	/* Request that the device be re-initialized */
2062 	ice_request_stack_reinit(sc);
2063 
2064 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2065 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2066 
2067 	ice_vsi_del_txqs_ctx(vsi);
2068 	ice_vsi_del_rxqs_ctx(vsi);
2069 
2070 	for (i = 0; i < sc->num_available_vsi; i++) {
2071 		if (sc->all_vsi[i])
2072 			ice_release_vsi(sc->all_vsi[i]);
2073 	}
2074 	sc->num_available_vsi = 0;
2075 
2076 	if (sc->all_vsi) {
2077 		free(sc->all_vsi, M_ICE);
2078 		sc->all_vsi = NULL;
2079 	}
2080 
2081 	/* Destroy the interrupt manager */
2082 	ice_resmgr_destroy(&sc->imgr);
2083 	/* Destroy the queue managers */
2084 	ice_resmgr_destroy(&sc->tx_qmgr);
2085 	ice_resmgr_destroy(&sc->rx_qmgr);
2086 
2087 	ice_deinit_hw(&sc->hw);
2088 }
2089 
2090 /**
2091  * ice_transition_safe_mode - Transition to safe mode
2092  * @sc: the device private softc
2093  *
2094  * Called when the driver attempts to reload the DDP package during a device
2095  * reset, and the new download fails. If so, we must transition to safe mode
2096  * at run time.
2097  *
2098  * @remark although safe mode normally allocates only a single queue, we can't
2099  * change the number of queues dynamically when using iflib. Due to this, we
2100  * do not attempt to reduce the number of queues.
2101  */
2102 static void
2103 ice_transition_safe_mode(struct ice_softc *sc)
2104 {
2105 	/* Indicate that we are in Safe mode */
2106 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
2107 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
2108 
2109 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2110 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2111 
2112 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2113 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en);
2114 }
2115 
2116 /**
2117  * ice_if_update_admin_status - update admin status
2118  * @ctx: iflib ctx structure
2119  *
2120  * Called by iflib to update the admin status. For our purposes, this means
2121  * check the adminq, and update the link status. It's ultimately triggered by
2122  * our admin interrupt, or by the ice_if_timer periodically.
2123  *
2124  * @pre assumes the caller holds the iflib CTX lock
2125  */
2126 static void
2127 ice_if_update_admin_status(if_ctx_t ctx)
2128 {
2129 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2130 	enum ice_fw_modes fw_mode;
2131 	bool reschedule = false;
2132 	u16 pending = 0;
2133 
2134 	ASSERT_CTX_LOCKED(sc);
2135 
2136 	/* Check if the firmware entered recovery mode at run time */
2137 	fw_mode = ice_get_fw_mode(&sc->hw);
2138 	if (fw_mode == ICE_FW_MODE_REC) {
2139 		if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2140 			/* If we just entered recovery mode, log a warning to
2141 			 * the system administrator and deinit driver state
2142 			 * that is no longer functional.
2143 			 */
2144 			ice_transition_recovery_mode(sc);
2145 		}
2146 	} else if (fw_mode == ICE_FW_MODE_ROLLBACK) {
2147 		if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) {
2148 			/* Rollback mode isn't fatal, but we don't want to
2149 			 * repeatedly post a message about it.
2150 			 */
2151 			ice_print_rollback_msg(&sc->hw);
2152 		}
2153 	}
2154 
2155 	/* Handle global reset events */
2156 	ice_handle_reset_event(sc);
2157 
2158 	/* Handle PF reset requests */
2159 	ice_handle_pf_reset_request(sc);
2160 
2161 	/* Handle MDD events */
2162 	ice_handle_mdd_event(sc);
2163 
2164 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) ||
2165 	    ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) ||
2166 	    ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2167 		/*
2168 		 * If we know the control queues are disabled, skip processing
2169 		 * the control queues entirely.
2170 		 */
2171 		;
2172 	} else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) {
2173 		ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending);
2174 		if (pending > 0)
2175 			reschedule = true;
2176 
2177 		ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending);
2178 		if (pending > 0)
2179 			reschedule = true;
2180 	}
2181 
2182 	/* Poll for link up */
2183 	ice_poll_for_media_avail(sc);
2184 
2185 	/* Check and update link status */
2186 	ice_update_link_status(sc, false);
2187 
2188 	/*
2189 	 * If there are still messages to process, we need to reschedule
2190 	 * ourselves. Otherwise, we can just re-enable the interrupt. We'll be
2191 	 * woken up at the next interrupt or timer event.
2192 	 */
2193 	if (reschedule) {
2194 		ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
2195 		iflib_admin_intr_deferred(ctx);
2196 	} else {
2197 		ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2198 	}
2199 }
2200 
2201 /**
2202  * ice_prepare_for_reset - Prepare device for an impending reset
2203  * @sc: The device private softc
2204  *
2205  * Prepare the driver for an impending reset, shutting down VSIs, clearing the
2206  * scheduler setup, and shutting down controlqs. Uses the
2207  * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the
2208  * driver for reset or not.
2209  */
2210 static void
2211 ice_prepare_for_reset(struct ice_softc *sc)
2212 {
2213 	struct ice_hw *hw = &sc->hw;
2214 
2215 	/* If we're already prepared, there's nothing to do */
2216 	if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET))
2217 		return;
2218 
2219 	log(LOG_INFO, "%s: preparing to reset device logic\n", sc->ifp->if_xname);
2220 
2221 	/* In recovery mode, hardware is not initialized */
2222 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2223 		return;
2224 
2225 	/* Release the main PF VSI queue mappings */
2226 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2227 				    sc->pf_vsi.num_tx_queues);
2228 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2229 				    sc->pf_vsi.num_rx_queues);
2230 
2231 	ice_clear_hw_tbls(hw);
2232 
2233 	if (hw->port_info)
2234 		ice_sched_clear_port(hw->port_info);
2235 
2236 	ice_shutdown_all_ctrlq(hw);
2237 }
2238 
2239 /**
2240  * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping
2241  * @sc: the device softc pointer
2242  *
2243  * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue
2244  * mapping after a reset occurred.
2245  */
2246 static int
2247 ice_rebuild_pf_vsi_qmap(struct ice_softc *sc)
2248 {
2249 	struct ice_vsi *vsi = &sc->pf_vsi;
2250 	struct ice_tx_queue *txq;
2251 	struct ice_rx_queue *rxq;
2252 	int err, i;
2253 
2254 	/* Re-assign Tx queues from PF space to the main VSI */
2255 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap,
2256 					    vsi->num_tx_queues);
2257 	if (err) {
2258 		device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n",
2259 			      ice_err_str(err));
2260 		return (err);
2261 	}
2262 
2263 	/* Re-assign Rx queues from PF space to this VSI */
2264 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap,
2265 					    vsi->num_rx_queues);
2266 	if (err) {
2267 		device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n",
2268 			      ice_err_str(err));
2269 		goto err_release_tx_queues;
2270 	}
2271 
2272 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
2273 
2274 	/* Re-assign Tx queue tail pointers */
2275 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
2276 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
2277 
2278 	/* Re-assign Rx queue tail pointers */
2279 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
2280 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
2281 
2282 	return (0);
2283 
2284 err_release_tx_queues:
2285 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2286 				   sc->pf_vsi.num_tx_queues);
2287 
2288 	return (err);
2289 }
2290 
2291 /* determine if the iflib context is active */
2292 #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING))
2293 
2294 /**
2295  * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode
2296  * @sc: The device private softc
2297  *
2298  * Handle a driver rebuild while in recovery mode. This will only rebuild the
2299  * limited functionality supported while in recovery mode.
2300  */
2301 static void
2302 ice_rebuild_recovery_mode(struct ice_softc *sc)
2303 {
2304 	device_t dev = sc->dev;
2305 
2306 	/* enable PCIe bus master */
2307 	pci_enable_busmaster(dev);
2308 
2309 	/* Configure interrupt causes for the administrative interrupt */
2310 	ice_configure_misc_interrupts(sc);
2311 
2312 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2313 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2314 
2315 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2316 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2317 
2318 	log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname);
2319 
2320 	/* In order to completely restore device functionality, the iflib core
2321 	 * needs to be reset. We need to request an iflib reset. Additionally,
2322 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2323 	 * the iflib core, we also want re-run the admin task so that iflib
2324 	 * resets immediately instead of waiting for the next interrupt.
2325 	 */
2326 	ice_request_stack_reinit(sc);
2327 
2328 	return;
2329 }
2330 
2331 /**
2332  * ice_rebuild - Rebuild driver state post reset
2333  * @sc: The device private softc
2334  *
2335  * Restore driver state after a reset occurred. Restart the controlqs, setup
2336  * the hardware port, and re-enable the VSIs.
2337  */
2338 static void
2339 ice_rebuild(struct ice_softc *sc)
2340 {
2341 	struct ice_hw *hw = &sc->hw;
2342 	device_t dev = sc->dev;
2343 	enum ice_status status;
2344 	int err;
2345 
2346 	sc->rebuild_ticks = ticks;
2347 
2348 	/* If we're rebuilding, then a reset has succeeded. */
2349 	ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED);
2350 
2351 	/*
2352 	 * If the firmware is in recovery mode, only restore the limited
2353 	 * functionality supported by recovery mode.
2354 	 */
2355 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2356 		ice_rebuild_recovery_mode(sc);
2357 		return;
2358 	}
2359 
2360 	/* enable PCIe bus master */
2361 	pci_enable_busmaster(dev);
2362 
2363 	status = ice_init_all_ctrlq(hw);
2364 	if (status) {
2365 		device_printf(dev, "failed to re-init controlqs, err %s\n",
2366 			      ice_status_str(status));
2367 		goto err_shutdown_ctrlq;
2368 	}
2369 
2370 	/* Query the allocated resources for Tx scheduler */
2371 	status = ice_sched_query_res_alloc(hw);
2372 	if (status) {
2373 		device_printf(dev,
2374 			      "Failed to query scheduler resources, err %s aq_err %s\n",
2375 			      ice_status_str(status),
2376 			      ice_aq_str(hw->adminq.sq_last_status));
2377 		goto err_shutdown_ctrlq;
2378 	}
2379 
2380 	/* Re-enable FW logging. Keep going even if this fails */
2381 	status = ice_fwlog_set(hw, &hw->fwlog_cfg);
2382 	if (!status) {
2383 		/*
2384 		 * We should have the most updated cached copy of the
2385 		 * configuration, regardless of whether we're rebuilding
2386 		 * or not.  So we'll simply check to see if logging was
2387 		 * enabled pre-rebuild.
2388 		 */
2389 		if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2390 			status = ice_fwlog_register(hw);
2391 			if (status)
2392 				device_printf(dev, "failed to re-register fw logging, err %s aq_err %s\n",
2393 				   ice_status_str(status),
2394 				   ice_aq_str(hw->adminq.sq_last_status));
2395 		}
2396 	} else
2397 		device_printf(dev, "failed to rebuild fw logging configuration, err %s aq_err %s\n",
2398 		   ice_status_str(status),
2399 		   ice_aq_str(hw->adminq.sq_last_status));
2400 
2401 	err = ice_send_version(sc);
2402 	if (err)
2403 		goto err_shutdown_ctrlq;
2404 
2405 	err = ice_init_link_events(sc);
2406 	if (err) {
2407 		device_printf(dev, "ice_init_link_events failed: %s\n",
2408 			      ice_err_str(err));
2409 		goto err_shutdown_ctrlq;
2410 	}
2411 
2412 	status = ice_clear_pf_cfg(hw);
2413 	if (status) {
2414 		device_printf(dev, "failed to clear PF configuration, err %s\n",
2415 			      ice_status_str(status));
2416 		goto err_shutdown_ctrlq;
2417 	}
2418 
2419 	ice_clear_pxe_mode(hw);
2420 
2421 	status = ice_get_caps(hw);
2422 	if (status) {
2423 		device_printf(dev, "failed to get capabilities, err %s\n",
2424 			      ice_status_str(status));
2425 		goto err_shutdown_ctrlq;
2426 	}
2427 
2428 	status = ice_sched_init_port(hw->port_info);
2429 	if (status) {
2430 		device_printf(dev, "failed to initialize port, err %s\n",
2431 			      ice_status_str(status));
2432 		goto err_sched_cleanup;
2433 	}
2434 
2435 	/* If we previously loaded the package, it needs to be reloaded now */
2436 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) {
2437 		status = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
2438 		if (status) {
2439 			ice_log_pkg_init(sc, &status);
2440 
2441 			ice_transition_safe_mode(sc);
2442 		}
2443 	}
2444 
2445 	ice_reset_pf_stats(sc);
2446 
2447 	err = ice_rebuild_pf_vsi_qmap(sc);
2448 	if (err) {
2449 		device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n",
2450 			      ice_err_str(err));
2451 		goto err_sched_cleanup;
2452 	}
2453 	err = ice_initialize_vsi(&sc->pf_vsi);
2454 	if (err) {
2455 		device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n",
2456 			      ice_err_str(err));
2457 		goto err_release_queue_allocations;
2458 	}
2459 
2460 	/* Replay all VSI configuration */
2461 	err = ice_replay_all_vsi_cfg(sc);
2462 	if (err)
2463 		goto err_deinit_pf_vsi;
2464 
2465 	/* Re-enable FW health event reporting */
2466 	ice_init_health_events(sc);
2467 
2468 	/* Reconfigure the main PF VSI for RSS */
2469 	err = ice_config_rss(&sc->pf_vsi);
2470 	if (err) {
2471 		device_printf(sc->dev,
2472 			      "Unable to reconfigure RSS for the main VSI, err %s\n",
2473 			      ice_err_str(err));
2474 		goto err_deinit_pf_vsi;
2475 	}
2476 
2477 	/* Refresh link status */
2478 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2479 	sc->hw.port_info->phy.get_link_info = true;
2480 	ice_get_link_status(sc->hw.port_info, &sc->link_up);
2481 	ice_update_link_status(sc, true);
2482 
2483 	/* Configure interrupt causes for the administrative interrupt */
2484 	ice_configure_misc_interrupts(sc);
2485 
2486 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2487 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2488 
2489 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2490 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2491 
2492 	log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname);
2493 
2494 	/* In order to completely restore device functionality, the iflib core
2495 	 * needs to be reset. We need to request an iflib reset. Additionally,
2496 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2497 	 * the iflib core, we also want re-run the admin task so that iflib
2498 	 * resets immediately instead of waiting for the next interrupt.
2499 	 */
2500 	ice_request_stack_reinit(sc);
2501 
2502 	return;
2503 
2504 err_deinit_pf_vsi:
2505 	ice_deinit_vsi(&sc->pf_vsi);
2506 err_release_queue_allocations:
2507 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2508 				    sc->pf_vsi.num_tx_queues);
2509 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2510 				    sc->pf_vsi.num_rx_queues);
2511 err_sched_cleanup:
2512 	ice_sched_cleanup_all(hw);
2513 err_shutdown_ctrlq:
2514 	ice_shutdown_all_ctrlq(hw);
2515 	ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2516 	device_printf(dev, "Driver rebuild failed, please reload the device driver\n");
2517 }
2518 
2519 /**
2520  * ice_handle_reset_event - Handle reset events triggered by OICR
2521  * @sc: The device private softc
2522  *
2523  * Handle reset events triggered by an OICR notification. This includes CORER,
2524  * GLOBR, and EMPR resets triggered by software on this or any other PF or by
2525  * firmware.
2526  *
2527  * @pre assumes the iflib context lock is held, and will unlock it while
2528  * waiting for the hardware to finish reset.
2529  */
2530 static void
2531 ice_handle_reset_event(struct ice_softc *sc)
2532 {
2533 	struct ice_hw *hw = &sc->hw;
2534 	enum ice_status status;
2535 	device_t dev = sc->dev;
2536 
2537 	/* When a CORER, GLOBR, or EMPR is about to happen, the hardware will
2538 	 * trigger an OICR interrupt. Our OICR handler will determine when
2539 	 * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as
2540 	 * appropriate.
2541 	 */
2542 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
2543 		return;
2544 
2545 	ice_prepare_for_reset(sc);
2546 
2547 	/*
2548 	 * Release the iflib context lock and wait for the device to finish
2549 	 * resetting.
2550 	 */
2551 	IFLIB_CTX_UNLOCK(sc);
2552 	status = ice_check_reset(hw);
2553 	IFLIB_CTX_LOCK(sc);
2554 	if (status) {
2555 		device_printf(dev, "Device never came out of reset, err %s\n",
2556 			      ice_status_str(status));
2557 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2558 		return;
2559 	}
2560 
2561 	/* We're done with the reset, so we can rebuild driver state */
2562 	sc->hw.reset_ongoing = false;
2563 	ice_rebuild(sc);
2564 
2565 	/* In the unlikely event that a PF reset request occurs at the same
2566 	 * time as a global reset, clear the request now. This avoids
2567 	 * resetting a second time right after we reset due to a global event.
2568 	 */
2569 	if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2570 		device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n");
2571 }
2572 
2573 /**
2574  * ice_handle_pf_reset_request - Initiate PF reset requested by software
2575  * @sc: The device private softc
2576  *
2577  * Initiate a PF reset requested by software. We handle this in the admin task
2578  * so that only one thread actually handles driver preparation and cleanup,
2579  * rather than having multiple threads possibly attempt to run this code
2580  * simultaneously.
2581  *
2582  * @pre assumes the iflib context lock is held and will unlock it while
2583  * waiting for the PF reset to complete.
2584  */
2585 static void
2586 ice_handle_pf_reset_request(struct ice_softc *sc)
2587 {
2588 	struct ice_hw *hw = &sc->hw;
2589 	enum ice_status status;
2590 
2591 	/* Check for PF reset requests */
2592 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2593 		return;
2594 
2595 	/* Make sure we're prepared for reset */
2596 	ice_prepare_for_reset(sc);
2597 
2598 	/*
2599 	 * Release the iflib context lock and wait for the device to finish
2600 	 * resetting.
2601 	 */
2602 	IFLIB_CTX_UNLOCK(sc);
2603 	status = ice_reset(hw, ICE_RESET_PFR);
2604 	IFLIB_CTX_LOCK(sc);
2605 	if (status) {
2606 		device_printf(sc->dev, "device PF reset failed, err %s\n",
2607 			      ice_status_str(status));
2608 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2609 		return;
2610 	}
2611 
2612 	sc->soft_stats.pfr_count++;
2613 	ice_rebuild(sc);
2614 }
2615 
2616 /**
2617  * ice_init_device_features - Init device driver features
2618  * @sc: driver softc structure
2619  *
2620  * @pre assumes that the function capabilities bits have been set up by
2621  * ice_init_hw().
2622  */
2623 static void
2624 ice_init_device_features(struct ice_softc *sc)
2625 {
2626 	/*
2627 	 * A failed pkg file download triggers safe mode, disabling advanced
2628 	 * device feature support
2629 	 */
2630 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE))
2631 		return;
2632 
2633 	/* Set capabilities that all devices support */
2634 	ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2635 	ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
2636 	ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
2637 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap);
2638 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap);
2639 	ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2640 	ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2641 	ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_cap);
2642 
2643 	/* Disable features due to hardware limitations... */
2644 	if (!sc->hw.func_caps.common_cap.rss_table_size)
2645 		ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2646 	/* Disable features due to firmware limitations... */
2647 	if (!ice_is_fw_health_report_supported(&sc->hw))
2648 		ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2649 	if (!ice_fwlog_supported(&sc->hw))
2650 		ice_clear_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2651 	if (sc->hw.fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2652 		if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_FW_LOGGING))
2653 			ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_en);
2654 		else
2655 			ice_fwlog_unregister(&sc->hw);
2656 	}
2657 
2658 	/* Disable capabilities not supported by the OS */
2659 	ice_disable_unsupported_features(sc->feat_cap);
2660 
2661 	/* RSS is always enabled for iflib */
2662 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS))
2663 		ice_set_bit(ICE_FEATURE_RSS, sc->feat_en);
2664 }
2665 
2666 /**
2667  * ice_if_multi_set - Callback to update Multicast filters in HW
2668  * @ctx: iflib ctx structure
2669  *
2670  * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search
2671  * the if_multiaddrs list and determine which filters have been added or
2672  * removed from the list, and update HW programming to reflect the new list.
2673  *
2674  * @pre assumes the caller holds the iflib CTX lock
2675  */
2676 static void
2677 ice_if_multi_set(if_ctx_t ctx)
2678 {
2679 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2680 	int err;
2681 
2682 	ASSERT_CTX_LOCKED(sc);
2683 
2684 	/* Do not handle multicast configuration in recovery mode */
2685 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2686 		return;
2687 
2688 	err = ice_sync_multicast_filters(sc);
2689 	if (err) {
2690 		device_printf(sc->dev,
2691 			      "Failed to synchronize multicast filter list: %s\n",
2692 			      ice_err_str(err));
2693 		return;
2694 	}
2695 }
2696 
2697 /**
2698  * ice_if_vlan_register - Register a VLAN with the hardware
2699  * @ctx: iflib ctx pointer
2700  * @vtag: VLAN to add
2701  *
2702  * Programs the main PF VSI with a hardware filter for the given VLAN.
2703  *
2704  * @pre assumes the caller holds the iflib CTX lock
2705  */
2706 static void
2707 ice_if_vlan_register(if_ctx_t ctx, u16 vtag)
2708 {
2709 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2710 	enum ice_status status;
2711 
2712 	ASSERT_CTX_LOCKED(sc);
2713 
2714 	/* Do not handle VLAN configuration in recovery mode */
2715 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2716 		return;
2717 
2718 	status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag);
2719 	if (status) {
2720 		device_printf(sc->dev,
2721 			      "Failure adding VLAN %d to main VSI, err %s aq_err %s\n",
2722 			      vtag, ice_status_str(status),
2723 			      ice_aq_str(sc->hw.adminq.sq_last_status));
2724 	}
2725 }
2726 
2727 /**
2728  * ice_if_vlan_unregister - Remove a VLAN filter from the hardware
2729  * @ctx: iflib ctx pointer
2730  * @vtag: VLAN to add
2731  *
2732  * Removes the previously programmed VLAN filter from the main PF VSI.
2733  *
2734  * @pre assumes the caller holds the iflib CTX lock
2735  */
2736 static void
2737 ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag)
2738 {
2739 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2740 	enum ice_status status;
2741 
2742 	ASSERT_CTX_LOCKED(sc);
2743 
2744 	/* Do not handle VLAN configuration in recovery mode */
2745 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2746 		return;
2747 
2748 	status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag);
2749 	if (status) {
2750 		device_printf(sc->dev,
2751 			      "Failure removing VLAN %d from main VSI, err %s aq_err %s\n",
2752 			      vtag, ice_status_str(status),
2753 			      ice_aq_str(sc->hw.adminq.sq_last_status));
2754 	}
2755 }
2756 
2757 /**
2758  * ice_if_stop - Stop the device
2759  * @ctx: iflib context structure
2760  *
2761  * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0
2762  * down)
2763  *
2764  * @pre assumes the caller holds the iflib CTX lock
2765  */
2766 static void
2767 ice_if_stop(if_ctx_t ctx)
2768 {
2769 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2770 
2771 	ASSERT_CTX_LOCKED(sc);
2772 
2773 	/*
2774 	 * The iflib core may call IFDI_STOP prior to the first call to
2775 	 * IFDI_INIT. This will cause us to attempt to remove MAC filters we
2776 	 * don't have, and disable Tx queues which aren't yet configured.
2777 	 * Although it is likely these extra operations are harmless, they do
2778 	 * cause spurious warning messages to be displayed, which may confuse
2779 	 * users.
2780 	 *
2781 	 * To avoid these messages, we use a state bit indicating if we've
2782 	 * been initialized. It will be set when ice_if_init is called, and
2783 	 * cleared here in ice_if_stop.
2784 	 */
2785 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
2786 		return;
2787 
2788 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
2789 		device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n");
2790 		return;
2791 	}
2792 
2793 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
2794 		device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n");
2795 		return;
2796 	}
2797 
2798 	/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
2799 	 * return of these functions because there's nothing we can really do
2800 	 * if they fail, and the functions already print error messages.
2801 	 * Just try to shut down as much as we can.
2802 	 */
2803 	ice_rm_pf_default_mac_filters(sc);
2804 
2805 	/* Dissociate the Tx and Rx queues from the interrupts */
2806 	ice_flush_txq_interrupts(&sc->pf_vsi);
2807 	ice_flush_rxq_interrupts(&sc->pf_vsi);
2808 
2809 	/* Disable the Tx and Rx queues */
2810 	ice_vsi_disable_tx(&sc->pf_vsi);
2811 	ice_control_rx_queues(&sc->pf_vsi, false);
2812 }
2813 
2814 /**
2815  * ice_if_get_counter - Get current value of an ifnet statistic
2816  * @ctx: iflib context pointer
2817  * @counter: ifnet counter to read
2818  *
2819  * Reads the current value of an ifnet counter for the device.
2820  *
2821  * This function is not protected by the iflib CTX lock.
2822  */
2823 static uint64_t
2824 ice_if_get_counter(if_ctx_t ctx, ift_counter counter)
2825 {
2826 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2827 
2828 	/* Return the counter for the main PF VSI */
2829 	return ice_get_ifnet_counter(&sc->pf_vsi, counter);
2830 }
2831 
2832 /**
2833  * ice_request_stack_reinit - Request that iflib re-initialize
2834  * @sc: the device private softc
2835  *
2836  * Request that the device be brought down and up, to re-initialize. For
2837  * example, this may be called when a device reset occurs, or when Tx and Rx
2838  * queues need to be re-initialized.
2839  *
2840  * This is required because the iflib state is outside the driver, and must be
2841  * re-initialized if we need to resart Tx and Rx queues.
2842  */
2843 void
2844 ice_request_stack_reinit(struct ice_softc *sc)
2845 {
2846 	if (CTX_ACTIVE(sc->ctx)) {
2847 		iflib_request_reset(sc->ctx);
2848 		iflib_admin_intr_deferred(sc->ctx);
2849 	}
2850 }
2851 
2852 /**
2853  * ice_driver_is_detaching - Check if the driver is detaching/unloading
2854  * @sc: device private softc
2855  *
2856  * Returns true if the driver is detaching, false otherwise.
2857  *
2858  * @remark on newer kernels, take advantage of iflib_in_detach in order to
2859  * report detachment correctly as early as possible.
2860  *
2861  * @remark this function is used by various code paths that want to avoid
2862  * running if the driver is about to be removed. This includes sysctls and
2863  * other driver access points. Note that it does not fully resolve
2864  * detach-based race conditions as it is possible for a thread to race with
2865  * iflib_in_detach.
2866  */
2867 bool
2868 ice_driver_is_detaching(struct ice_softc *sc)
2869 {
2870 	return (ice_test_state(&sc->state, ICE_STATE_DETACHING) ||
2871 		iflib_in_detach(sc->ctx));
2872 }
2873 
2874 /**
2875  * ice_if_priv_ioctl - Device private ioctl handler
2876  * @ctx: iflib context pointer
2877  * @command: The ioctl command issued
2878  * @data: ioctl specific data
2879  *
2880  * iflib callback for handling custom driver specific ioctls.
2881  *
2882  * @pre Assumes that the iflib context lock is held.
2883  */
2884 static int
2885 ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
2886 {
2887 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2888 	struct ifdrv *ifd;
2889 	device_t dev = sc->dev;
2890 
2891 	if (data == NULL)
2892 		return (EINVAL);
2893 
2894 	ASSERT_CTX_LOCKED(sc);
2895 
2896 	/* Make sure the command type is valid */
2897 	switch (command) {
2898 	case SIOCSDRVSPEC:
2899 	case SIOCGDRVSPEC:
2900 		/* Accepted commands */
2901 		break;
2902 	case SIOCGPRIVATE_0:
2903 		/*
2904 		 * Although we do not support this ioctl command, it's
2905 		 * expected that iflib will forward it to the IFDI_PRIV_IOCTL
2906 		 * handler. Do not print a message in this case
2907 		 */
2908 		return (ENOTSUP);
2909 	default:
2910 		/*
2911 		 * If we get a different command for this function, it's
2912 		 * definitely unexpected, so log a message indicating what
2913 		 * command we got for debugging purposes.
2914 		 */
2915 		device_printf(dev, "%s: unexpected ioctl command %08lx\n",
2916 			      __func__, command);
2917 		return (EINVAL);
2918 	}
2919 
2920 	ifd = (struct ifdrv *)data;
2921 
2922 	switch (ifd->ifd_cmd) {
2923 	case ICE_NVM_ACCESS:
2924 		return ice_handle_nvm_access_ioctl(sc, ifd);
2925 	default:
2926 		return EINVAL;
2927 	}
2928 }
2929 
2930 /**
2931  * ice_if_i2c_req - I2C request handler for iflib
2932  * @ctx: iflib context pointer
2933  * @req: The I2C parameters to use
2934  *
2935  * Read from the port's I2C eeprom using the parameters from the ioctl.
2936  *
2937  * @remark The iflib-only part is pretty simple.
2938  */
2939 static int
2940 ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req)
2941 {
2942 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2943 
2944 	return ice_handle_i2c_req(sc, req);
2945 }
2946 
2947 /**
2948  * ice_if_suspend - PCI device suspend handler for iflib
2949  * @ctx: iflib context pointer
2950  *
2951  * Deinitializes the driver and clears HW resources in preparation for
2952  * suspend or an FLR.
2953  *
2954  * @returns 0; this return value is ignored
2955  */
2956 static int
2957 ice_if_suspend(if_ctx_t ctx)
2958 {
2959 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2960 
2961 	/* At least a PFR is always going to happen after this;
2962 	 * either via FLR or during the D3->D0 transition.
2963 	 */
2964 	ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
2965 
2966 	ice_prepare_for_reset(sc);
2967 
2968 	return (0);
2969 }
2970 
2971 /**
2972  * ice_if_resume - PCI device resume handler for iflib
2973  * @ctx: iflib context pointer
2974  *
2975  * Reinitializes the driver and the HW after PCI resume or after
2976  * an FLR. An init is performed by iflib after this function is finished.
2977  *
2978  * @returns 0; this return value is ignored
2979  */
2980 static int
2981 ice_if_resume(if_ctx_t ctx)
2982 {
2983 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2984 
2985 	ice_rebuild(sc);
2986 
2987 	return (0);
2988 }
2989 
2990