xref: /freebsd/sys/dev/ice/if_ice_iflib.c (revision 5c4aa6257210502c93ad65882a8a4842d984bae2)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2021, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 /*$FreeBSD$*/
32 
33 /**
34  * @file if_ice_iflib.c
35  * @brief iflib driver implementation
36  *
37  * Contains the main entry point for the iflib driver implementation. It
38  * implements the various ifdi driver methods, and sets up the module and
39  * driver values to load an iflib driver.
40  */
41 
42 #include "ice_iflib.h"
43 #include "ice_drv_info.h"
44 #include "ice_switch.h"
45 #include "ice_sched.h"
46 
47 #include <sys/module.h>
48 #include <sys/sockio.h>
49 #include <sys/smp.h>
50 #include <dev/pci/pcivar.h>
51 #include <dev/pci/pcireg.h>
52 
53 /*
54  * Device method prototypes
55  */
56 
57 static void *ice_register(device_t);
58 static int  ice_if_attach_pre(if_ctx_t);
59 static int  ice_attach_pre_recovery_mode(struct ice_softc *sc);
60 static int  ice_if_attach_post(if_ctx_t);
61 static void ice_attach_post_recovery_mode(struct ice_softc *sc);
62 static int  ice_if_detach(if_ctx_t);
63 static int  ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets);
64 static int  ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets);
65 static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix);
66 static void ice_if_queues_free(if_ctx_t ctx);
67 static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu);
68 static void ice_if_intr_enable(if_ctx_t ctx);
69 static void ice_if_intr_disable(if_ctx_t ctx);
70 static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
71 static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
72 static int ice_if_promisc_set(if_ctx_t ctx, int flags);
73 static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
74 static int ice_if_media_change(if_ctx_t ctx);
75 static void ice_if_init(if_ctx_t ctx);
76 static void ice_if_timer(if_ctx_t ctx, uint16_t qid);
77 static void ice_if_update_admin_status(if_ctx_t ctx);
78 static void ice_if_multi_set(if_ctx_t ctx);
79 static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag);
80 static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
81 static void ice_if_stop(if_ctx_t ctx);
82 static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter);
83 static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
84 static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
85 static int ice_if_suspend(if_ctx_t ctx);
86 static int ice_if_resume(if_ctx_t ctx);
87 
88 static int ice_msix_que(void *arg);
89 static int ice_msix_admin(void *arg);
90 
91 /*
92  * Helper function prototypes
93  */
94 static int ice_pci_mapping(struct ice_softc *sc);
95 static void ice_free_pci_mapping(struct ice_softc *sc);
96 static void ice_update_link_status(struct ice_softc *sc, bool update_media);
97 static void ice_init_device_features(struct ice_softc *sc);
98 static void ice_init_tx_tracking(struct ice_vsi *vsi);
99 static void ice_handle_reset_event(struct ice_softc *sc);
100 static void ice_handle_pf_reset_request(struct ice_softc *sc);
101 static void ice_prepare_for_reset(struct ice_softc *sc);
102 static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc);
103 static void ice_rebuild(struct ice_softc *sc);
104 static void ice_rebuild_recovery_mode(struct ice_softc *sc);
105 static void ice_free_irqvs(struct ice_softc *sc);
106 static void ice_update_rx_mbuf_sz(struct ice_softc *sc);
107 static void ice_poll_for_media_avail(struct ice_softc *sc);
108 static void ice_setup_scctx(struct ice_softc *sc);
109 static int ice_allocate_msix(struct ice_softc *sc);
110 static void ice_admin_timer(void *arg);
111 static void ice_transition_recovery_mode(struct ice_softc *sc);
112 static void ice_transition_safe_mode(struct ice_softc *sc);
113 
114 /*
115  * Device Interface Declaration
116  */
117 
118 /**
119  * @var ice_methods
120  * @brief ice driver method entry points
121  *
122  * List of device methods implementing the generic device interface used by
123  * the device stack to interact with the ice driver. Since this is an iflib
124  * driver, most of the methods point to the generic iflib implementation.
125  */
126 static device_method_t ice_methods[] = {
127 	/* Device interface */
128 	DEVMETHOD(device_register, ice_register),
129 	DEVMETHOD(device_probe,    iflib_device_probe_vendor),
130 	DEVMETHOD(device_attach,   iflib_device_attach),
131 	DEVMETHOD(device_detach,   iflib_device_detach),
132 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
133 	DEVMETHOD(device_suspend,  iflib_device_suspend),
134 	DEVMETHOD(device_resume,   iflib_device_resume),
135 	DEVMETHOD_END
136 };
137 
138 /**
139  * @var ice_iflib_methods
140  * @brief iflib method entry points
141  *
142  * List of device methods used by the iflib stack to interact with this
143  * driver. These are the real main entry points used to interact with this
144  * driver.
145  */
146 static device_method_t ice_iflib_methods[] = {
147 	DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre),
148 	DEVMETHOD(ifdi_attach_post, ice_if_attach_post),
149 	DEVMETHOD(ifdi_detach, ice_if_detach),
150 	DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc),
151 	DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc),
152 	DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign),
153 	DEVMETHOD(ifdi_queues_free, ice_if_queues_free),
154 	DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set),
155 	DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable),
156 	DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable),
157 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable),
158 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable),
159 	DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set),
160 	DEVMETHOD(ifdi_media_status, ice_if_media_status),
161 	DEVMETHOD(ifdi_media_change, ice_if_media_change),
162 	DEVMETHOD(ifdi_init, ice_if_init),
163 	DEVMETHOD(ifdi_stop, ice_if_stop),
164 	DEVMETHOD(ifdi_timer, ice_if_timer),
165 	DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status),
166 	DEVMETHOD(ifdi_multi_set, ice_if_multi_set),
167 	DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register),
168 	DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister),
169 	DEVMETHOD(ifdi_get_counter, ice_if_get_counter),
170 	DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl),
171 	DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req),
172 	DEVMETHOD(ifdi_suspend, ice_if_suspend),
173 	DEVMETHOD(ifdi_resume, ice_if_resume),
174 	DEVMETHOD_END
175 };
176 
177 /**
178  * @var ice_driver
179  * @brief driver structure for the generic device stack
180  *
181  * driver_t definition used to setup the generic device methods.
182  */
183 static driver_t ice_driver = {
184 	.name = "ice",
185 	.methods = ice_methods,
186 	.size = sizeof(struct ice_softc),
187 };
188 
189 /**
190  * @var ice_iflib_driver
191  * @brief driver structure for the iflib stack
192  *
193  * driver_t definition used to setup the iflib device methods.
194  */
195 static driver_t ice_iflib_driver = {
196 	.name = "ice",
197 	.methods = ice_iflib_methods,
198 	.size = sizeof(struct ice_softc),
199 };
200 
201 extern struct if_txrx ice_txrx;
202 extern struct if_txrx ice_recovery_txrx;
203 
204 /**
205  * @var ice_sctx
206  * @brief ice driver shared context
207  *
208  * Structure defining shared values (context) that is used by all instances of
209  * the device. Primarily used to setup details about how the iflib stack
210  * should treat this driver. Also defines the default, minimum, and maximum
211  * number of descriptors in each ring.
212  */
213 static struct if_shared_ctx ice_sctx = {
214 	.isc_magic = IFLIB_MAGIC,
215 	.isc_q_align = PAGE_SIZE,
216 
217 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
218 	/* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but
219 	 * that doesn't make sense since that would be larger than the maximum
220 	 * size of a single packet.
221 	 */
222 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
223 
224 	/* XXX: This is only used by iflib to ensure that
225 	 * scctx->isc_tx_tso_size_max + the VLAN header is a valid size.
226 	 */
227 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
228 	/* XXX: This is used by iflib to set the number of segments in the TSO
229 	 * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the
230 	 * related ifnet parameter.
231 	 */
232 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
233 
234 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
235 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
236 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
237 
238 	.isc_nfl = 1,
239 	.isc_ntxqs = 1,
240 	.isc_nrxqs = 1,
241 
242 	.isc_admin_intrcnt = 1,
243 	.isc_vendor_info = ice_vendor_info_array,
244 	.isc_driver_version = __DECONST(char *, ice_driver_version),
245 	.isc_driver = &ice_iflib_driver,
246 
247 	/*
248 	 * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available
249 	 * for hardware checksum offload
250 	 *
251 	 * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the
252 	 * IP sum field, required by our hardware to calculate valid TSO
253 	 * checksums.
254 	 *
255 	 * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs
256 	 * even when the interface is down.
257 	 *
258 	 * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X
259 	 * vectors manually instead of relying on iflib code to do this.
260 	 */
261 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
262 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX,
263 
264 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
265 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
266 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
267 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
268 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
269 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
270 };
271 
272 /**
273  * @var ice_devclass
274  * @brief ice driver device class
275  *
276  * device class used to setup the ice driver module kobject class.
277  */
278 devclass_t ice_devclass;
279 DRIVER_MODULE(ice, pci, ice_driver, ice_devclass, ice_module_event_handler, 0);
280 
281 MODULE_VERSION(ice, 1);
282 MODULE_DEPEND(ice, pci, 1, 1, 1);
283 MODULE_DEPEND(ice, ether, 1, 1, 1);
284 MODULE_DEPEND(ice, iflib, 1, 1, 1);
285 
286 IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array);
287 
288 /* Static driver-wide sysctls */
289 #include "ice_iflib_sysctls.h"
290 
291 /**
292  * ice_pci_mapping - Map PCI BAR memory
293  * @sc: device private softc
294  *
295  * Map PCI BAR 0 for device operation.
296  */
297 static int
298 ice_pci_mapping(struct ice_softc *sc)
299 {
300 	int rc;
301 
302 	/* Map BAR0 */
303 	rc = ice_map_bar(sc->dev, &sc->bar0, 0);
304 	if (rc)
305 		return rc;
306 
307 	return 0;
308 }
309 
310 /**
311  * ice_free_pci_mapping - Release PCI BAR memory
312  * @sc: device private softc
313  *
314  * Release PCI BARs which were previously mapped by ice_pci_mapping().
315  */
316 static void
317 ice_free_pci_mapping(struct ice_softc *sc)
318 {
319 	/* Free BAR0 */
320 	ice_free_bar(sc->dev, &sc->bar0);
321 }
322 
323 /*
324  * Device methods
325  */
326 
327 /**
328  * ice_register - register device method callback
329  * @dev: the device being registered
330  *
331  * Returns a pointer to the shared context structure, which is used by iflib.
332  */
333 static void *
334 ice_register(device_t dev __unused)
335 {
336 	return &ice_sctx;
337 } /* ice_register */
338 
339 /**
340  * ice_setup_scctx - Setup the iflib softc context structure
341  * @sc: the device private structure
342  *
343  * Setup the parameters in if_softc_ctx_t structure used by the iflib stack
344  * when loading.
345  */
346 static void
347 ice_setup_scctx(struct ice_softc *sc)
348 {
349 	if_softc_ctx_t scctx = sc->scctx;
350 	struct ice_hw *hw = &sc->hw;
351 	bool safe_mode, recovery_mode;
352 
353 	safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE);
354 	recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE);
355 
356 	/*
357 	 * If the driver loads in Safe mode or Recovery mode, limit iflib to
358 	 * a single queue pair.
359 	 */
360 	if (safe_mode || recovery_mode) {
361 		scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1;
362 		scctx->isc_ntxqsets_max = 1;
363 		scctx->isc_nrxqsets_max = 1;
364 	} else {
365 		/*
366 		 * iflib initially sets the isc_ntxqsets and isc_nrxqsets to
367 		 * the values of the override sysctls. Cache these initial
368 		 * values so that the driver can be aware of what the iflib
369 		 * sysctl value is when setting up MSI-X vectors.
370 		 */
371 		sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets;
372 		sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets;
373 
374 		if (scctx->isc_ntxqsets == 0)
375 			scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size;
376 		if (scctx->isc_nrxqsets == 0)
377 			scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size;
378 
379 		scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq;
380 		scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq;
381 
382 		/*
383 		 * Sanity check that the iflib sysctl values are within the
384 		 * maximum supported range.
385 		 */
386 		if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max)
387 			sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max;
388 		if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max)
389 			sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max;
390 	}
391 
392 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
393 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
394 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
395 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
396 
397 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
398 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
399 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
400 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
401 
402 	scctx->isc_msix_bar = PCIR_BAR(ICE_MSIX_BAR);
403 	scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size;
404 
405 	/*
406 	 * If the driver loads in recovery mode, disable Tx/Rx functionality
407 	 */
408 	if (recovery_mode)
409 		scctx->isc_txrx = &ice_recovery_txrx;
410 	else
411 		scctx->isc_txrx = &ice_txrx;
412 
413 	/*
414 	 * If the driver loads in Safe mode or Recovery mode, disable
415 	 * advanced features including hardware offloads.
416 	 */
417 	if (safe_mode || recovery_mode) {
418 		scctx->isc_capenable = ICE_SAFE_CAPS;
419 		scctx->isc_tx_csum_flags = 0;
420 	} else {
421 		scctx->isc_capenable = ICE_FULL_CAPS;
422 		scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
423 	}
424 
425 	scctx->isc_capabilities = scctx->isc_capenable;
426 } /* ice_setup_scctx */
427 
428 /**
429  * ice_if_attach_pre - Early device attach logic
430  * @ctx: the iflib context structure
431  *
432  * Called by iflib during the attach process. Earliest main driver entry
433  * point which performs necessary hardware and driver initialization. Called
434  * before the Tx and Rx queues are allocated.
435  */
436 static int
437 ice_if_attach_pre(if_ctx_t ctx)
438 {
439 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
440 	enum ice_fw_modes fw_mode;
441 	enum ice_status status;
442 	if_softc_ctx_t scctx;
443 	struct ice_hw *hw;
444 	device_t dev;
445 	int err;
446 
447 	device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n");
448 
449 	ice_set_state(&sc->state, ICE_STATE_ATTACHING);
450 
451 	sc->ctx = ctx;
452 	sc->media = iflib_get_media(ctx);
453 	sc->sctx = iflib_get_sctx(ctx);
454 	sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx);
455 
456 	dev = sc->dev = iflib_get_dev(ctx);
457 	scctx = sc->scctx = iflib_get_softc_ctx(ctx);
458 
459 	hw = &sc->hw;
460 	hw->back = sc;
461 
462 	snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name),
463 		 "%s:admin", device_get_nameunit(dev));
464 	mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF);
465 	callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0);
466 
467 	ASSERT_CTX_LOCKED(sc);
468 
469 	if (ice_pci_mapping(sc)) {
470 		err = (ENXIO);
471 		goto destroy_admin_timer;
472 	}
473 
474 	/* Save off the PCI information */
475 	ice_save_pci_info(hw, dev);
476 
477 	/* create tunables as early as possible */
478 	ice_add_device_tunables(sc);
479 
480 	/* Setup ControlQ lengths */
481 	ice_set_ctrlq_len(hw);
482 
483 	fw_mode = ice_get_fw_mode(hw);
484 	if (fw_mode == ICE_FW_MODE_REC) {
485 		device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
486 
487 		err = ice_attach_pre_recovery_mode(sc);
488 		if (err)
489 			goto free_pci_mapping;
490 
491 		return (0);
492 	}
493 
494 	/* Initialize the hw data structure */
495 	status = ice_init_hw(hw);
496 	if (status) {
497 		if (status == ICE_ERR_FW_API_VER) {
498 			/* Enter recovery mode, so that the driver remains
499 			 * loaded. This way, if the system administrator
500 			 * cannot update the driver, they may still attempt to
501 			 * downgrade the NVM.
502 			 */
503 			err = ice_attach_pre_recovery_mode(sc);
504 			if (err)
505 				goto free_pci_mapping;
506 
507 			return (0);
508 		} else {
509 			err = EIO;
510 			device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n",
511 				      ice_status_str(status),
512 				      ice_aq_str(hw->adminq.sq_last_status));
513 		}
514 		goto free_pci_mapping;
515 	}
516 
517 	/* Notify firmware of the device driver version */
518 	err = ice_send_version(sc);
519 	if (err)
520 		goto deinit_hw;
521 
522 	ice_load_pkg_file(sc);
523 
524 	err = ice_init_link_events(sc);
525 	if (err) {
526 		device_printf(dev, "ice_init_link_events failed: %s\n",
527 			      ice_err_str(err));
528 		goto deinit_hw;
529 	}
530 
531 	ice_print_nvm_version(sc);
532 
533 	ice_init_device_features(sc);
534 
535 	/* Setup the MAC address */
536 	iflib_set_mac(ctx, hw->port_info->mac.lan_addr);
537 
538 	/* Setup the iflib softc context structure */
539 	ice_setup_scctx(sc);
540 
541 	/* Initialize the Tx queue manager */
542 	err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq);
543 	if (err) {
544 		device_printf(dev, "Unable to initialize Tx queue manager: %s\n",
545 			      ice_err_str(err));
546 		goto deinit_hw;
547 	}
548 
549 	/* Initialize the Rx queue manager */
550 	err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq);
551 	if (err) {
552 		device_printf(dev, "Unable to initialize Rx queue manager: %s\n",
553 			      ice_err_str(err));
554 		goto free_tx_qmgr;
555 	}
556 
557 	/* Initialize the interrupt resource manager */
558 	err = ice_alloc_intr_tracking(sc);
559 	if (err)
560 		/* Errors are already printed */
561 		goto free_rx_qmgr;
562 
563 	/* Determine maximum number of VSIs we'll prepare for */
564 	sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE,
565 				    hw->func_caps.guar_num_vsi);
566 
567 	if (!sc->num_available_vsi) {
568 		err = EIO;
569 		device_printf(dev, "No VSIs allocated to host\n");
570 		goto free_intr_tracking;
571 	}
572 
573 	/* Allocate storage for the VSI pointers */
574 	sc->all_vsi = (struct ice_vsi **)
575 		malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi,
576 		       M_ICE, M_WAITOK | M_ZERO);
577 	if (!sc->all_vsi) {
578 		err = ENOMEM;
579 		device_printf(dev, "Unable to allocate VSI array\n");
580 		goto free_intr_tracking;
581 	}
582 
583 	/*
584 	 * Prepare the statically allocated primary PF VSI in the softc
585 	 * structure. Other VSIs will be dynamically allocated as needed.
586 	 */
587 	ice_setup_pf_vsi(sc);
588 
589 	err = ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max,
590 	    scctx->isc_nrxqsets_max);
591 	if (err) {
592 		device_printf(dev, "Unable to allocate VSI Queue maps\n");
593 		goto free_main_vsi;
594 	}
595 
596 	/* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */
597 	err = ice_allocate_msix(sc);
598 	if (err)
599 		goto free_main_vsi;
600 
601 	return 0;
602 
603 free_main_vsi:
604 	/* ice_release_vsi will free the queue maps if they were allocated */
605 	ice_release_vsi(&sc->pf_vsi);
606 	free(sc->all_vsi, M_ICE);
607 	sc->all_vsi = NULL;
608 free_intr_tracking:
609 	ice_free_intr_tracking(sc);
610 free_rx_qmgr:
611 	ice_resmgr_destroy(&sc->rx_qmgr);
612 free_tx_qmgr:
613 	ice_resmgr_destroy(&sc->tx_qmgr);
614 deinit_hw:
615 	ice_deinit_hw(hw);
616 free_pci_mapping:
617 	ice_free_pci_mapping(sc);
618 destroy_admin_timer:
619 	mtx_lock(&sc->admin_mtx);
620 	callout_stop(&sc->admin_timer);
621 	mtx_unlock(&sc->admin_mtx);
622 	mtx_destroy(&sc->admin_mtx);
623 	return err;
624 } /* ice_if_attach_pre */
625 
626 /**
627  * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery
628  * @sc: the device private softc
629  *
630  * Loads the device driver in limited Firmware Recovery mode, intended to
631  * allow users to update the firmware to attempt to recover the device.
632  *
633  * @remark We may enter recovery mode in case either (a) the firmware is
634  * detected to be in an invalid state and must be re-programmed, or (b) the
635  * driver detects that the loaded firmware has a non-compatible API version
636  * that the driver cannot operate with.
637  */
638 static int
639 ice_attach_pre_recovery_mode(struct ice_softc *sc)
640 {
641 	ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE);
642 
643 	/* Setup the iflib softc context */
644 	ice_setup_scctx(sc);
645 
646 	/* Setup the PF VSI back pointer */
647 	sc->pf_vsi.sc = sc;
648 
649 	/*
650 	 * We still need to allocate MSI-X vectors since we need one vector to
651 	 * run the administrative admin interrupt
652 	 */
653 	return ice_allocate_msix(sc);
654 }
655 
656 /**
657  * ice_update_link_status - notify OS of link state change
658  * @sc: device private softc structure
659  * @update_media: true if we should update media even if link didn't change
660  *
661  * Called to notify iflib core of link status changes. Should be called once
662  * during attach_post, and whenever link status changes during runtime.
663  *
664  * This call only updates the currently supported media types if the link
665  * status changed, or if update_media is set to true.
666  */
667 static void
668 ice_update_link_status(struct ice_softc *sc, bool update_media)
669 {
670 	struct ice_hw *hw = &sc->hw;
671 	enum ice_status status;
672 
673 	/* Never report link up when in recovery mode */
674 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
675 		return;
676 
677 	/* Report link status to iflib only once each time it changes */
678 	if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) {
679 		if (sc->link_up) { /* link is up */
680 			uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info);
681 
682 			ice_set_default_local_lldp_mib(sc);
683 
684 			iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
685 
686 			ice_link_up_msg(sc);
687 
688 			update_media = true;
689 		} else { /* link is down */
690 			iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
691 
692 			update_media = true;
693 		}
694 	}
695 
696 	/* Update the supported media types */
697 	if (update_media) {
698 		status = ice_add_media_types(sc, sc->media);
699 		if (status)
700 			device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
701 				      ice_status_str(status),
702 				      ice_aq_str(hw->adminq.sq_last_status));
703 	}
704 
705 	/* TODO: notify VFs of link state change */
706 }
707 
708 /**
709  * ice_if_attach_post - Late device attach logic
710  * @ctx: the iflib context structure
711  *
712  * Called by iflib to finish up attaching the device. Performs any attach
713  * logic which must wait until after the Tx and Rx queues have been
714  * allocated.
715  */
716 static int
717 ice_if_attach_post(if_ctx_t ctx)
718 {
719 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
720 	if_t ifp = iflib_get_ifp(ctx);
721 	int err;
722 
723 	ASSERT_CTX_LOCKED(sc);
724 
725 	/* We don't yet support loading if MSI-X is not supported */
726 	if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) {
727 		device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n");
728 		return (ENOTSUP);
729 	}
730 
731 	/* The ifnet structure hasn't yet been initialized when the attach_pre
732 	 * handler is called, so wait until attach_post to setup the
733 	 * isc_max_frame_size.
734 	 */
735 
736 	sc->ifp = ifp;
737 	sc->scctx->isc_max_frame_size = ifp->if_mtu +
738 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
739 
740 	/*
741 	 * If we are in recovery mode, only perform a limited subset of
742 	 * initialization to support NVM recovery.
743 	 */
744 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
745 		ice_attach_post_recovery_mode(sc);
746 		return (0);
747 	}
748 
749 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
750 
751 	err = ice_initialize_vsi(&sc->pf_vsi);
752 	if (err) {
753 		device_printf(sc->dev, "Unable to initialize Main VSI: %s\n",
754 			      ice_err_str(err));
755 		return err;
756 	}
757 
758 	/* Enable FW health event reporting */
759 	ice_init_health_events(sc);
760 
761 	/* Configure the main PF VSI for RSS */
762 	err = ice_config_rss(&sc->pf_vsi);
763 	if (err) {
764 		device_printf(sc->dev,
765 			      "Unable to configure RSS for the main VSI, err %s\n",
766 			      ice_err_str(err));
767 		return err;
768 	}
769 
770 	/* Configure switch to drop transmitted LLDP and PAUSE frames */
771 	err = ice_cfg_pf_ethertype_filters(sc);
772 	if (err)
773 		return err;
774 
775 	ice_get_and_print_bus_info(sc);
776 
777 	ice_set_link_management_mode(sc);
778 
779 	ice_init_saved_phy_cfg(sc);
780 
781 	ice_cfg_pba_num(sc);
782 
783 	ice_add_device_sysctls(sc);
784 
785 	/* Get DCBX/LLDP state and start DCBX agent */
786 	ice_init_dcb_setup(sc);
787 
788 	/* Setup link configuration parameters */
789 	ice_init_link_configuration(sc);
790 	ice_update_link_status(sc, true);
791 
792 	/* Configure interrupt causes for the administrative interrupt */
793 	ice_configure_misc_interrupts(sc);
794 
795 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
796 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
797 
798 	/* Start the admin timer */
799 	mtx_lock(&sc->admin_mtx);
800 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
801 	mtx_unlock(&sc->admin_mtx);
802 
803 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
804 
805 	return 0;
806 } /* ice_if_attach_post */
807 
808 /**
809  * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery
810  * @sc: the device private softc
811  *
812  * Performs minimal work to prepare the driver to recover an NVM in case the
813  * firmware is in recovery mode.
814  */
815 static void
816 ice_attach_post_recovery_mode(struct ice_softc *sc)
817 {
818 	/* Configure interrupt causes for the administrative interrupt */
819 	ice_configure_misc_interrupts(sc);
820 
821 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
822 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
823 
824 	/* Start the admin timer */
825 	mtx_lock(&sc->admin_mtx);
826 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
827 	mtx_unlock(&sc->admin_mtx);
828 
829 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
830 }
831 
832 /**
833  * ice_free_irqvs - Free IRQ vector memory
834  * @sc: the device private softc structure
835  *
836  * Free IRQ vector memory allocated during ice_if_msix_intr_assign.
837  */
838 static void
839 ice_free_irqvs(struct ice_softc *sc)
840 {
841 	struct ice_vsi *vsi = &sc->pf_vsi;
842 	if_ctx_t ctx = sc->ctx;
843 	int i;
844 
845 	/* If the irqvs array is NULL, then there are no vectors to free */
846 	if (sc->irqvs == NULL)
847 		return;
848 
849 	/* Free the IRQ vectors */
850 	for (i = 0; i < sc->num_irq_vectors; i++)
851 		iflib_irq_free(ctx, &sc->irqvs[i].irq);
852 
853 	/* Clear the irqv pointers */
854 	for (i = 0; i < vsi->num_rx_queues; i++)
855 		vsi->rx_queues[i].irqv = NULL;
856 
857 	for (i = 0; i < vsi->num_tx_queues; i++)
858 		vsi->tx_queues[i].irqv = NULL;
859 
860 	/* Release the vector array memory */
861 	free(sc->irqvs, M_ICE);
862 	sc->irqvs = NULL;
863 	sc->num_irq_vectors = 0;
864 }
865 
866 /**
867  * ice_if_detach - Device driver detach logic
868  * @ctx: iflib context structure
869  *
870  * Perform device shutdown logic to detach the device driver.
871  *
872  * Note that there is no guarantee of the ordering of ice_if_queues_free() and
873  * ice_if_detach(). It is possible for the functions to be called in either
874  * order, and they must not assume to have a strict ordering.
875  */
876 static int
877 ice_if_detach(if_ctx_t ctx)
878 {
879 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
880 	struct ice_vsi *vsi = &sc->pf_vsi;
881 	int i;
882 
883 	ASSERT_CTX_LOCKED(sc);
884 
885 	/* Indicate that we're detaching */
886 	ice_set_state(&sc->state, ICE_STATE_DETACHING);
887 
888 	/* Stop the admin timer */
889 	mtx_lock(&sc->admin_mtx);
890 	callout_stop(&sc->admin_timer);
891 	mtx_unlock(&sc->admin_mtx);
892 	mtx_destroy(&sc->admin_mtx);
893 
894 	/* Free allocated media types */
895 	ifmedia_removeall(sc->media);
896 
897 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
898 	 * pointers. Note, the calls here and those in ice_if_queues_free()
899 	 * are *BOTH* necessary, as we cannot guarantee which path will be
900 	 * run first
901 	 */
902 	ice_vsi_del_txqs_ctx(vsi);
903 	ice_vsi_del_rxqs_ctx(vsi);
904 
905 	/* Release MSI-X resources */
906 	ice_free_irqvs(sc);
907 
908 	for (i = 0; i < sc->num_available_vsi; i++) {
909 		if (sc->all_vsi[i])
910 			ice_release_vsi(sc->all_vsi[i]);
911 	}
912 
913 	if (sc->all_vsi) {
914 		free(sc->all_vsi, M_ICE);
915 		sc->all_vsi = NULL;
916 	}
917 
918 	/* Release MSI-X memory */
919 	pci_release_msi(sc->dev);
920 
921 	if (sc->msix_table != NULL) {
922 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
923 				     rman_get_rid(sc->msix_table),
924 				     sc->msix_table);
925 		sc->msix_table = NULL;
926 	}
927 
928 	ice_free_intr_tracking(sc);
929 
930 	/* Destroy the queue managers */
931 	ice_resmgr_destroy(&sc->tx_qmgr);
932 	ice_resmgr_destroy(&sc->rx_qmgr);
933 
934 	if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
935 		ice_deinit_hw(&sc->hw);
936 
937 	ice_free_pci_mapping(sc);
938 
939 	return 0;
940 } /* ice_if_detach */
941 
942 /**
943  * ice_if_tx_queues_alloc - Allocate Tx queue memory
944  * @ctx: iflib context structure
945  * @vaddrs: virtual addresses for the queue memory
946  * @paddrs: physical addresses for the queue memory
947  * @ntxqs: the number of Tx queues per set (should always be 1)
948  * @ntxqsets: the number of Tx queue sets to allocate
949  *
950  * Called by iflib to allocate Tx queues for the device. Allocates driver
951  * memory to track each queue, the status arrays used for descriptor
952  * status reporting, and Tx queue sysctls.
953  */
954 static int
955 ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
956 		       int __invariant_only ntxqs, int ntxqsets)
957 {
958 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
959 	struct ice_vsi *vsi = &sc->pf_vsi;
960 	struct ice_tx_queue *txq;
961 	int err, i, j;
962 
963 	MPASS(ntxqs == 1);
964 	MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
965 	ASSERT_CTX_LOCKED(sc);
966 
967 	/* Do not bother allocating queues if we're in recovery mode */
968 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
969 		return (0);
970 
971 	/* Allocate queue structure memory */
972 	if (!(vsi->tx_queues =
973 	      (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_WAITOK | M_ZERO))) {
974 		device_printf(sc->dev, "Unable to allocate Tx queue memory\n");
975 		return (ENOMEM);
976 	}
977 
978 	/* Allocate report status arrays */
979 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
980 		if (!(txq->tx_rsq =
981 		      (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_WAITOK))) {
982 			device_printf(sc->dev, "Unable to allocate tx_rsq memory\n");
983 			err = ENOMEM;
984 			goto free_tx_queues;
985 		}
986 		/* Initialize report status array */
987 		for (j = 0; j < sc->scctx->isc_ntxd[0]; j++)
988 			txq->tx_rsq[j] = QIDX_INVALID;
989 	}
990 
991 	/* Assign queues from PF space to the main VSI */
992 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets);
993 	if (err) {
994 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
995 			      ice_err_str(err));
996 		goto free_tx_queues;
997 	}
998 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
999 
1000 	/* Add Tx queue sysctls context */
1001 	ice_vsi_add_txqs_ctx(vsi);
1002 
1003 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1004 		/* q_handle == me when only one TC */
1005 		txq->me = txq->q_handle = i;
1006 		txq->vsi = vsi;
1007 
1008 		/* store the queue size for easier access */
1009 		txq->desc_count = sc->scctx->isc_ntxd[0];
1010 
1011 		/* get the virtual and physical address of the hardware queues */
1012 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
1013 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
1014 		txq->tx_paddr = paddrs[i];
1015 
1016 		ice_add_txq_sysctls(txq);
1017 	}
1018 
1019 	vsi->num_tx_queues = ntxqsets;
1020 
1021 	return (0);
1022 
1023 free_tx_queues:
1024 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1025 		if (txq->tx_rsq != NULL) {
1026 			free(txq->tx_rsq, M_ICE);
1027 			txq->tx_rsq = NULL;
1028 		}
1029 	}
1030 	free(vsi->tx_queues, M_ICE);
1031 	vsi->tx_queues = NULL;
1032 	return err;
1033 }
1034 
1035 /**
1036  * ice_if_rx_queues_alloc - Allocate Rx queue memory
1037  * @ctx: iflib context structure
1038  * @vaddrs: virtual addresses for the queue memory
1039  * @paddrs: physical addresses for the queue memory
1040  * @nrxqs: number of Rx queues per set (should always be 1)
1041  * @nrxqsets: number of Rx queue sets to allocate
1042  *
1043  * Called by iflib to allocate Rx queues for the device. Allocates driver
1044  * memory to track each queue, as well as sets up the Rx queue sysctls.
1045  */
1046 static int
1047 ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1048 		       int __invariant_only nrxqs, int nrxqsets)
1049 {
1050 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1051 	struct ice_vsi *vsi = &sc->pf_vsi;
1052 	struct ice_rx_queue *rxq;
1053 	int err, i;
1054 
1055 	MPASS(nrxqs == 1);
1056 	MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
1057 	ASSERT_CTX_LOCKED(sc);
1058 
1059 	/* Do not bother allocating queues if we're in recovery mode */
1060 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1061 		return (0);
1062 
1063 	/* Allocate queue structure memory */
1064 	if (!(vsi->rx_queues =
1065 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_WAITOK | M_ZERO))) {
1066 		device_printf(sc->dev, "Unable to allocate Rx queue memory\n");
1067 		return (ENOMEM);
1068 	}
1069 
1070 	/* Assign queues from PF space to the main VSI */
1071 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets);
1072 	if (err) {
1073 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1074 			      ice_err_str(err));
1075 		goto free_rx_queues;
1076 	}
1077 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1078 
1079 	/* Add Rx queue sysctls context */
1080 	ice_vsi_add_rxqs_ctx(vsi);
1081 
1082 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
1083 		rxq->me = i;
1084 		rxq->vsi = vsi;
1085 
1086 		/* store the queue size for easier access */
1087 		rxq->desc_count = sc->scctx->isc_nrxd[0];
1088 
1089 		/* get the virtual and physical address of the hardware queues */
1090 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
1091 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
1092 		rxq->rx_paddr = paddrs[i];
1093 
1094 		ice_add_rxq_sysctls(rxq);
1095 	}
1096 
1097 	vsi->num_rx_queues = nrxqsets;
1098 
1099 	return (0);
1100 
1101 free_rx_queues:
1102 	free(vsi->rx_queues, M_ICE);
1103 	vsi->rx_queues = NULL;
1104 	return err;
1105 }
1106 
1107 /**
1108  * ice_if_queues_free - Free queue memory
1109  * @ctx: the iflib context structure
1110  *
1111  * Free queue memory allocated by ice_if_tx_queues_alloc() and
1112  * ice_if_rx_queues_alloc().
1113  *
1114  * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be
1115  * called in the same order. It's possible for ice_if_queues_free() to be
1116  * called prior to ice_if_detach(), and vice versa.
1117  *
1118  * For this reason, the main VSI is a static member of the ice_softc, which is
1119  * not free'd until after iflib finishes calling both of these functions.
1120  *
1121  * Thus, care must be taken in how we manage the memory being freed by this
1122  * function, and in what tasks it can and must perform.
1123  */
1124 static void
1125 ice_if_queues_free(if_ctx_t ctx)
1126 {
1127 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1128 	struct ice_vsi *vsi = &sc->pf_vsi;
1129 	struct ice_tx_queue *txq;
1130 	int i;
1131 
1132 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
1133 	 * pointers. Note, the calls here and those in ice_if_detach()
1134 	 * are *BOTH* necessary, as we cannot guarantee which path will be
1135 	 * run first
1136 	 */
1137 	ice_vsi_del_txqs_ctx(vsi);
1138 	ice_vsi_del_rxqs_ctx(vsi);
1139 
1140 	/* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */
1141 	ice_free_irqvs(sc);
1142 
1143 	if (vsi->tx_queues != NULL) {
1144 		/* free the tx_rsq arrays */
1145 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1146 			if (txq->tx_rsq != NULL) {
1147 				free(txq->tx_rsq, M_ICE);
1148 				txq->tx_rsq = NULL;
1149 			}
1150 		}
1151 		free(vsi->tx_queues, M_ICE);
1152 		vsi->tx_queues = NULL;
1153 		vsi->num_tx_queues = 0;
1154 	}
1155 	if (vsi->rx_queues != NULL) {
1156 		free(vsi->rx_queues, M_ICE);
1157 		vsi->rx_queues = NULL;
1158 		vsi->num_rx_queues = 0;
1159 	}
1160 }
1161 
1162 /**
1163  * ice_msix_que - Fast interrupt handler for MSI-X receive queues
1164  * @arg: The Rx queue memory
1165  *
1166  * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when
1167  * an MSI-X interrupt for a given queue is triggered. Currently this just asks
1168  * iflib to schedule the main Rx thread.
1169  */
1170 static int
1171 ice_msix_que(void *arg)
1172 {
1173 	struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg;
1174 
1175 	/* TODO: dynamic ITR algorithm?? */
1176 
1177 	return (FILTER_SCHEDULE_THREAD);
1178 }
1179 
1180 /**
1181  * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt
1182  * @arg: pointer to device softc memory
1183  *
1184  * Called by iflib when an administrative interrupt occurs. Should perform any
1185  * fast logic for handling the interrupt cause, and then indicate whether the
1186  * admin task needs to be queued.
1187  */
1188 static int
1189 ice_msix_admin(void *arg)
1190 {
1191 	struct ice_softc *sc = (struct ice_softc *)arg;
1192 	struct ice_hw *hw = &sc->hw;
1193 	device_t dev = sc->dev;
1194 	u32 oicr;
1195 
1196 	/* There is no safe way to modify the enabled miscellaneous causes of
1197 	 * the OICR vector at runtime, as doing so would be prone to race
1198 	 * conditions. Reading PFINT_OICR will unmask the associated interrupt
1199 	 * causes and allow future interrupts to occur. The admin interrupt
1200 	 * vector will not be re-enabled until after we exit this function,
1201 	 * but any delayed tasks must be resilient against possible "late
1202 	 * arrival" interrupts that occur while we're already handling the
1203 	 * task. This is done by using state bits and serializing these
1204 	 * delayed tasks via the admin status task function.
1205 	 */
1206 	oicr = rd32(hw, PFINT_OICR);
1207 
1208 	/* Processing multiple controlq interrupts on a single vector does not
1209 	 * provide an indication of which controlq triggered the interrupt.
1210 	 * We might try reading the INTEVENT bit of the respective PFINT_*_CTL
1211 	 * registers. However, the INTEVENT bit is not guaranteed to be set as
1212 	 * it gets automatically cleared when the hardware acknowledges the
1213 	 * interrupt.
1214 	 *
1215 	 * This means we don't really have a good indication of whether or
1216 	 * which controlq triggered this interrupt. We'll just notify the
1217 	 * admin task that it should check all the controlqs.
1218 	 */
1219 	ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
1220 
1221 	if (oicr & PFINT_OICR_VFLR_M) {
1222 		ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING);
1223 	}
1224 
1225 	if (oicr & PFINT_OICR_MAL_DETECT_M) {
1226 		ice_set_state(&sc->state, ICE_STATE_MDD_PENDING);
1227 	}
1228 
1229 	if (oicr & PFINT_OICR_GRST_M) {
1230 		u32 reset;
1231 
1232 		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
1233 			GLGEN_RSTAT_RESET_TYPE_S;
1234 
1235 		if (reset == ICE_RESET_CORER)
1236 			sc->soft_stats.corer_count++;
1237 		else if (reset == ICE_RESET_GLOBR)
1238 			sc->soft_stats.globr_count++;
1239 		else
1240 			sc->soft_stats.empr_count++;
1241 
1242 		/* There are a couple of bits at play for handling resets.
1243 		 * First, the ICE_STATE_RESET_OICR_RECV bit is used to
1244 		 * indicate that the driver has received an OICR with a reset
1245 		 * bit active, indicating that a CORER/GLOBR/EMPR is about to
1246 		 * happen. Second, we set hw->reset_ongoing to indicate that
1247 		 * the hardware is in reset. We will set this back to false as
1248 		 * soon as the driver has determined that the hardware is out
1249 		 * of reset.
1250 		 *
1251 		 * If the driver wishes to trigger a reqest, it can set one of
1252 		 * the ICE_STATE_RESET_*_REQ bits, which will trigger the
1253 		 * correct type of reset.
1254 		 */
1255 		if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
1256 			hw->reset_ongoing = true;
1257 	}
1258 
1259 	if (oicr & PFINT_OICR_ECC_ERR_M) {
1260 		device_printf(dev, "ECC Error detected!\n");
1261 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1262 	}
1263 
1264 	if (oicr & PFINT_OICR_PE_CRITERR_M) {
1265 		device_printf(dev, "Critical Protocol Engine Error detected!\n");
1266 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1267 	}
1268 
1269 	if (oicr & PFINT_OICR_PCI_EXCEPTION_M) {
1270 		device_printf(dev, "PCI Exception detected!\n");
1271 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1272 	}
1273 
1274 	if (oicr & PFINT_OICR_HMC_ERR_M) {
1275 		/* Log the HMC errors, but don't disable the interrupt cause */
1276 		ice_log_hmc_error(hw, dev);
1277 	}
1278 
1279 	return (FILTER_SCHEDULE_THREAD);
1280 }
1281 
1282 /**
1283  * ice_allocate_msix - Allocate MSI-X vectors for the interface
1284  * @sc: the device private softc
1285  *
1286  * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process.
1287  *
1288  * First, determine a suitable total number of vectors based on the number
1289  * of CPUs, RSS buckets, the administrative vector, and other demands such as
1290  * RDMA.
1291  *
1292  * Request the desired amount of vectors, and see how many we obtain. If we
1293  * don't obtain as many as desired, reduce the demands by lowering the number
1294  * of requested queues or reducing the demand from other features such as
1295  * RDMA.
1296  *
1297  * @remark This function is required because the driver sets the
1298  * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors
1299  * manually.
1300  *
1301  * @remark This driver will only use MSI-X vectors. If this is not possible,
1302  * neither MSI or legacy interrupts will be tried.
1303  *
1304  * @post on success this function must set the following scctx parameters:
1305  * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr.
1306  *
1307  * @returns zero on success or an error code on failure.
1308  */
1309 static int
1310 ice_allocate_msix(struct ice_softc *sc)
1311 {
1312 	bool iflib_override_queue_count = false;
1313 	if_softc_ctx_t scctx = sc->scctx;
1314 	device_t dev = sc->dev;
1315 	cpuset_t cpus;
1316 	int bar, queues, vectors, requested;
1317 	int err = 0;
1318 
1319 	/* Allocate the MSI-X bar */
1320 	bar = scctx->isc_msix_bar;
1321 	sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE);
1322 	if (!sc->msix_table) {
1323 		device_printf(dev, "Unable to map MSI-X table\n");
1324 		return (ENOMEM);
1325 	}
1326 
1327 	/* Check if the iflib queue count sysctls have been set */
1328 	if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs)
1329 		iflib_override_queue_count = true;
1330 
1331 	err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus);
1332 	if (err) {
1333 		device_printf(dev, "%s: Unable to fetch the CPU list: %s\n",
1334 			      __func__, ice_err_str(err));
1335 		CPU_COPY(&all_cpus, &cpus);
1336 	}
1337 
1338 	/* Attempt to mimic behavior of iflib_msix_init */
1339 	if (iflib_override_queue_count) {
1340 		/*
1341 		 * If the override sysctls have been set, limit the queues to
1342 		 * the number of logical CPUs.
1343 		 */
1344 		queues = mp_ncpus;
1345 	} else {
1346 		/*
1347 		 * Otherwise, limit the queue count to the CPUs associated
1348 		 * with the NUMA node the device is associated with.
1349 		 */
1350 		queues = CPU_COUNT(&cpus);
1351 	}
1352 
1353 	/* Clamp to the number of RSS buckets */
1354 	queues = imin(queues, rss_getnumbuckets());
1355 
1356 	/*
1357 	 * Clamp the number of queue pairs to the minimum of the requested Tx
1358 	 * and Rx queues.
1359 	 */
1360 	queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
1361 	queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
1362 
1363 	/*
1364 	 * Determine the number of vectors to request. Note that we also need
1365 	 * to allocate one vector for administrative tasks.
1366 	 */
1367 	requested = queues + 1;
1368 
1369 	vectors = requested;
1370 
1371 	err = pci_alloc_msix(dev, &vectors);
1372 	if (err) {
1373 		device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n",
1374 			      vectors, ice_err_str(err));
1375 		goto err_free_msix_table;
1376 	}
1377 
1378 	/* If we don't receive enough vectors, reduce demands */
1379 	if (vectors < requested) {
1380 		int diff = requested - vectors;
1381 
1382 		device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n",
1383 			      requested, vectors);
1384 
1385 		/*
1386 		 * If we still have a difference, we need to reduce the number
1387 		 * of queue pairs.
1388 		 *
1389 		 * However, we still need at least one vector for the admin
1390 		 * interrupt and one queue pair.
1391 		 */
1392 		if (queues <= diff) {
1393 			device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n");
1394 			err = (ERANGE);
1395 			goto err_pci_release_msi;
1396 		}
1397 
1398 		queues -= diff;
1399 	}
1400 
1401 	device_printf(dev, "Using %d Tx and Rx queues\n", queues);
1402 	device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
1403 		      vectors);
1404 
1405 	scctx->isc_vectors = vectors;
1406 	scctx->isc_nrxqsets = queues;
1407 	scctx->isc_ntxqsets = queues;
1408 	scctx->isc_intr = IFLIB_INTR_MSIX;
1409 
1410 	/* Interrupt allocation tracking isn't required in recovery mode,
1411 	 * since neither RDMA nor VFs are enabled.
1412 	 */
1413 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1414 		return (0);
1415 
1416 	/* Keep track of which interrupt indices are being used for what */
1417 	sc->lan_vectors = vectors;
1418 	err = ice_resmgr_assign_contiguous(&sc->imgr, sc->pf_imap, sc->lan_vectors);
1419 	if (err) {
1420 		device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
1421 			      ice_err_str(err));
1422 		goto err_pci_release_msi;
1423 	}
1424 
1425 	return (0);
1426 
1427 err_pci_release_msi:
1428 	pci_release_msi(dev);
1429 err_free_msix_table:
1430 	if (sc->msix_table != NULL) {
1431 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
1432 				rman_get_rid(sc->msix_table),
1433 				sc->msix_table);
1434 		sc->msix_table = NULL;
1435 	}
1436 
1437 	return (err);
1438 }
1439 
1440 /**
1441  * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues
1442  * @ctx: the iflib context structure
1443  * @msix: the number of vectors we were assigned
1444  *
1445  * Called by iflib to assign MSI-X vectors to queues. Currently requires that
1446  * we get at least the same number of vectors as we have queues, and that we
1447  * always have the same number of Tx and Rx queues.
1448  *
1449  * Tx queues use a softirq instead of using their own hardware interrupt.
1450  */
1451 static int
1452 ice_if_msix_intr_assign(if_ctx_t ctx, int msix)
1453 {
1454 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1455 	struct ice_vsi *vsi = &sc->pf_vsi;
1456 	int err, i, vector;
1457 
1458 	ASSERT_CTX_LOCKED(sc);
1459 
1460 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
1461 		device_printf(sc->dev,
1462 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
1463 			      vsi->num_tx_queues, vsi->num_rx_queues);
1464 		return (EOPNOTSUPP);
1465 	}
1466 
1467 	if (msix < (vsi->num_rx_queues + 1)) {
1468 		device_printf(sc->dev,
1469 			      "Not enough MSI-X vectors to assign one vector to each queue pair\n");
1470 		return (EOPNOTSUPP);
1471 	}
1472 
1473 	/* Save the number of vectors for future use */
1474 	sc->num_irq_vectors = vsi->num_rx_queues + 1;
1475 
1476 	/* Allocate space to store the IRQ vector data */
1477 	if (!(sc->irqvs =
1478 	      (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors),
1479 					       M_ICE, M_NOWAIT))) {
1480 		device_printf(sc->dev,
1481 			      "Unable to allocate irqv memory\n");
1482 		return (ENOMEM);
1483 	}
1484 
1485 	/* Administrative interrupt events will use vector 0 */
1486 	err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN,
1487 				      ice_msix_admin, sc, 0, "admin");
1488 	if (err) {
1489 		device_printf(sc->dev,
1490 			      "Failed to register Admin queue handler: %s\n",
1491 			      ice_err_str(err));
1492 		goto free_irqvs;
1493 	}
1494 	sc->irqvs[0].me = 0;
1495 
1496 	/* Do not allocate queue interrupts when in recovery mode */
1497 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1498 		return (0);
1499 
1500 	for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) {
1501 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1502 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1503 		int rid = vector + 1;
1504 		char irq_name[16];
1505 
1506 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
1507 		err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid,
1508 					      IFLIB_INTR_RXTX, ice_msix_que,
1509 					      rxq, rxq->me, irq_name);
1510 		if (err) {
1511 			device_printf(sc->dev,
1512 				      "Failed to allocate q int %d err: %s\n",
1513 				      i, ice_err_str(err));
1514 			vector--;
1515 			i--;
1516 			goto fail;
1517 		}
1518 		sc->irqvs[vector].me = vector;
1519 		rxq->irqv = &sc->irqvs[vector];
1520 
1521 		bzero(irq_name, sizeof(irq_name));
1522 
1523 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
1524 		iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq,
1525 					    IFLIB_INTR_TX, txq,
1526 					    txq->me, irq_name);
1527 		txq->irqv = &sc->irqvs[vector];
1528 	}
1529 
1530 	return (0);
1531 fail:
1532 	for (; i >= 0; i--, vector--)
1533 		iflib_irq_free(ctx, &sc->irqvs[vector].irq);
1534 	iflib_irq_free(ctx, &sc->irqvs[0].irq);
1535 free_irqvs:
1536 	free(sc->irqvs, M_ICE);
1537 	sc->irqvs = NULL;
1538 	return err;
1539 }
1540 
1541 /**
1542  * ice_if_mtu_set - Set the device MTU
1543  * @ctx: iflib context structure
1544  * @mtu: the MTU requested
1545  *
1546  * Called by iflib to configure the device's Maximum Transmission Unit (MTU).
1547  *
1548  * @pre assumes the caller holds the iflib CTX lock
1549  */
1550 static int
1551 ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu)
1552 {
1553 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1554 
1555 	ASSERT_CTX_LOCKED(sc);
1556 
1557 	/* Do not support configuration when in recovery mode */
1558 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1559 		return (ENOSYS);
1560 
1561 	if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU)
1562 		return (EINVAL);
1563 
1564 	sc->scctx->isc_max_frame_size = mtu +
1565 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1566 
1567 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
1568 
1569 	return (0);
1570 }
1571 
1572 /**
1573  * ice_if_intr_enable - Enable device interrupts
1574  * @ctx: iflib context structure
1575  *
1576  * Called by iflib to request enabling device interrupts.
1577  */
1578 static void
1579 ice_if_intr_enable(if_ctx_t ctx)
1580 {
1581 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1582 	struct ice_vsi *vsi = &sc->pf_vsi;
1583 	struct ice_hw *hw = &sc->hw;
1584 
1585 	ASSERT_CTX_LOCKED(sc);
1586 
1587 	/* Enable ITR 0 */
1588 	ice_enable_intr(hw, sc->irqvs[0].me);
1589 
1590 	/* Do not enable queue interrupts in recovery mode */
1591 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1592 		return;
1593 
1594 	/* Enable all queue interrupts */
1595 	for (int i = 0; i < vsi->num_rx_queues; i++)
1596 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
1597 }
1598 
1599 /**
1600  * ice_if_intr_disable - Disable device interrupts
1601  * @ctx: iflib context structure
1602  *
1603  * Called by iflib to request disabling device interrupts.
1604  */
1605 static void
1606 ice_if_intr_disable(if_ctx_t ctx)
1607 {
1608 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1609 	struct ice_hw *hw = &sc->hw;
1610 	unsigned int i;
1611 
1612 	ASSERT_CTX_LOCKED(sc);
1613 
1614 	/* IFDI_INTR_DISABLE may be called prior to interrupts actually being
1615 	 * assigned to queues. Instead of assuming that the interrupt
1616 	 * assignment in the rx_queues structure is valid, just disable all
1617 	 * possible interrupts
1618 	 *
1619 	 * Note that we choose not to disable ITR 0 because this handles the
1620 	 * AdminQ interrupts, and we want to keep processing these even when
1621 	 * the interface is offline.
1622 	 */
1623 	for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++)
1624 		ice_disable_intr(hw, i);
1625 }
1626 
1627 /**
1628  * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
1629  * @ctx: iflib context structure
1630  * @rxqid: the Rx queue to enable
1631  *
1632  * Enable a specific Rx queue interrupt.
1633  *
1634  * This function is not protected by the iflib CTX lock.
1635  */
1636 static int
1637 ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
1638 {
1639 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1640 	struct ice_vsi *vsi = &sc->pf_vsi;
1641 	struct ice_hw *hw = &sc->hw;
1642 
1643 	/* Do not enable queue interrupts in recovery mode */
1644 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1645 		return (ENOSYS);
1646 
1647 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
1648 	return (0);
1649 }
1650 
1651 /**
1652  * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
1653  * @ctx: iflib context structure
1654  * @txqid: the Tx queue to enable
1655  *
1656  * Enable a specific Tx queue interrupt.
1657  *
1658  * This function is not protected by the iflib CTX lock.
1659  */
1660 static int
1661 ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
1662 {
1663 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1664 	struct ice_vsi *vsi = &sc->pf_vsi;
1665 	struct ice_hw *hw = &sc->hw;
1666 
1667 	/* Do not enable queue interrupts in recovery mode */
1668 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1669 		return (ENOSYS);
1670 
1671 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
1672 	return (0);
1673 }
1674 
1675 /**
1676  * ice_if_promisc_set - Set device promiscuous mode
1677  * @ctx: iflib context structure
1678  * @flags: promiscuous flags to configure
1679  *
1680  * Called by iflib to configure device promiscuous mode.
1681  *
1682  * @remark Calls to this function will always overwrite the previous setting
1683  */
1684 static int
1685 ice_if_promisc_set(if_ctx_t ctx, int flags)
1686 {
1687 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1688 	struct ice_hw *hw = &sc->hw;
1689 	device_t dev = sc->dev;
1690 	enum ice_status status;
1691 	bool promisc_enable = flags & IFF_PROMISC;
1692 	bool multi_enable = flags & IFF_ALLMULTI;
1693 
1694 	/* Do not support configuration when in recovery mode */
1695 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1696 		return (ENOSYS);
1697 
1698 	if (multi_enable)
1699 		return (EOPNOTSUPP);
1700 
1701 	if (promisc_enable) {
1702 		status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx,
1703 					     ICE_VSI_PROMISC_MASK, 0);
1704 		if (status && status != ICE_ERR_ALREADY_EXISTS) {
1705 			device_printf(dev,
1706 				      "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n",
1707 				      ice_status_str(status),
1708 				      ice_aq_str(hw->adminq.sq_last_status));
1709 			return (EIO);
1710 		}
1711 	} else {
1712 		status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx,
1713 					       ICE_VSI_PROMISC_MASK, 0);
1714 		if (status) {
1715 			device_printf(dev,
1716 				      "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n",
1717 				      ice_status_str(status),
1718 				      ice_aq_str(hw->adminq.sq_last_status));
1719 			return (EIO);
1720 		}
1721 	}
1722 
1723 	return (0);
1724 }
1725 
1726 /**
1727  * ice_if_media_change - Change device media
1728  * @ctx: device ctx structure
1729  *
1730  * Called by iflib when a media change is requested. This operation is not
1731  * supported by the hardware, so we just return an error code.
1732  */
1733 static int
1734 ice_if_media_change(if_ctx_t ctx)
1735 {
1736 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1737 
1738 	device_printf(sc->dev, "Media change is not supported.\n");
1739 	return (ENODEV);
1740 }
1741 
1742 /**
1743  * ice_if_media_status - Report current device media
1744  * @ctx: iflib context structure
1745  * @ifmr: ifmedia request structure to update
1746  *
1747  * Updates the provided ifmr with current device media status, including link
1748  * status and media type.
1749  */
1750 static void
1751 ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr)
1752 {
1753 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1754 	struct ice_link_status *li = &sc->hw.port_info->phy.link_info;
1755 
1756 	ifmr->ifm_status = IFM_AVALID;
1757 	ifmr->ifm_active = IFM_ETHER;
1758 
1759 	/* Never report link up or media types when in recovery mode */
1760 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1761 		return;
1762 
1763 	if (!sc->link_up)
1764 		return;
1765 
1766 	ifmr->ifm_status |= IFM_ACTIVE;
1767 	ifmr->ifm_active |= IFM_FDX;
1768 
1769 	if (li->phy_type_low)
1770 		ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low);
1771 	else if (li->phy_type_high)
1772 		ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high);
1773 	else
1774 		ifmr->ifm_active |= IFM_UNKNOWN;
1775 
1776 	/* Report flow control status as well */
1777 	if (li->an_info & ICE_AQ_LINK_PAUSE_TX)
1778 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1779 	if (li->an_info & ICE_AQ_LINK_PAUSE_RX)
1780 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1781 }
1782 
1783 /**
1784  * ice_init_tx_tracking - Initialize Tx queue software tracking values
1785  * @vsi: the VSI to initialize
1786  *
1787  * Initialize Tx queue software tracking values, including the Report Status
1788  * queue, and related software tracking values.
1789  */
1790 static void
1791 ice_init_tx_tracking(struct ice_vsi *vsi)
1792 {
1793 	struct ice_tx_queue *txq;
1794 	size_t j;
1795 	int i;
1796 
1797 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1798 
1799 		txq->tx_rs_cidx = txq->tx_rs_pidx = 0;
1800 
1801 		/* Initialize the last processed descriptor to be the end of
1802 		 * the ring, rather than the start, so that we avoid an
1803 		 * off-by-one error in ice_ift_txd_credits_update for the
1804 		 * first packet.
1805 		 */
1806 		txq->tx_cidx_processed = txq->desc_count - 1;
1807 
1808 		for (j = 0; j < txq->desc_count; j++)
1809 			txq->tx_rsq[j] = QIDX_INVALID;
1810 	}
1811 }
1812 
1813 /**
1814  * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues
1815  * @sc: the device softc
1816  *
1817  * Called to update the Rx queue mbuf_sz parameter for configuring the receive
1818  * buffer sizes when programming hardware.
1819  */
1820 static void
1821 ice_update_rx_mbuf_sz(struct ice_softc *sc)
1822 {
1823 	uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx);
1824 	struct ice_vsi *vsi = &sc->pf_vsi;
1825 
1826 	MPASS(mbuf_sz <= UINT16_MAX);
1827 	vsi->mbuf_sz = mbuf_sz;
1828 }
1829 
1830 /**
1831  * ice_if_init - Initialize the device
1832  * @ctx: iflib ctx structure
1833  *
1834  * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes
1835  * device filters and prepares the Tx and Rx engines.
1836  *
1837  * @pre assumes the caller holds the iflib CTX lock
1838  */
1839 static void
1840 ice_if_init(if_ctx_t ctx)
1841 {
1842 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1843 	device_t dev = sc->dev;
1844 	int err;
1845 
1846 	ASSERT_CTX_LOCKED(sc);
1847 
1848 	/*
1849 	 * We've seen an issue with 11.3/12.1 where sideband routines are
1850 	 * called after detach is called.  This would call routines after
1851 	 * if_stop, causing issues with the teardown process.  This has
1852 	 * seemingly been fixed in STABLE snapshots, but it seems like a
1853 	 * good idea to have this guard here regardless.
1854 	 */
1855 	if (ice_driver_is_detaching(sc))
1856 		return;
1857 
1858 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1859 		return;
1860 
1861 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
1862 		device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n");
1863 		return;
1864 	}
1865 
1866 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
1867 		device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n");
1868 		return;
1869 	}
1870 
1871 	ice_update_rx_mbuf_sz(sc);
1872 
1873 	/* Update the MAC address... User might use a LAA */
1874 	err = ice_update_laa_mac(sc);
1875 	if (err) {
1876 		device_printf(dev,
1877 			      "LAA address change failed, err %s\n",
1878 			      ice_err_str(err));
1879 		return;
1880 	}
1881 
1882 	/* Initialize software Tx tracking values */
1883 	ice_init_tx_tracking(&sc->pf_vsi);
1884 
1885 	err = ice_cfg_vsi_for_tx(&sc->pf_vsi);
1886 	if (err) {
1887 		device_printf(dev,
1888 			      "Unable to configure the main VSI for Tx: %s\n",
1889 			      ice_err_str(err));
1890 		return;
1891 	}
1892 
1893 	err = ice_cfg_vsi_for_rx(&sc->pf_vsi);
1894 	if (err) {
1895 		device_printf(dev,
1896 			      "Unable to configure the main VSI for Rx: %s\n",
1897 			      ice_err_str(err));
1898 		goto err_cleanup_tx;
1899 	}
1900 
1901 	err = ice_control_rx_queues(&sc->pf_vsi, true);
1902 	if (err) {
1903 		device_printf(dev,
1904 			      "Unable to enable Rx rings for transmit: %s\n",
1905 			      ice_err_str(err));
1906 		goto err_cleanup_tx;
1907 	}
1908 
1909 	err = ice_cfg_pf_default_mac_filters(sc);
1910 	if (err) {
1911 		device_printf(dev,
1912 			      "Unable to configure default MAC filters: %s\n",
1913 			      ice_err_str(err));
1914 		goto err_stop_rx;
1915 	}
1916 
1917 	/* We use software interrupts for Tx, so we only program the hardware
1918 	 * interrupts for Rx.
1919 	 */
1920 	ice_configure_rxq_interrupts(&sc->pf_vsi);
1921 	ice_configure_rx_itr(&sc->pf_vsi);
1922 
1923 	/* Configure promiscuous mode */
1924 	ice_if_promisc_set(ctx, if_getflags(sc->ifp));
1925 
1926 	ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
1927 	return;
1928 
1929 err_stop_rx:
1930 	ice_control_rx_queues(&sc->pf_vsi, false);
1931 err_cleanup_tx:
1932 	ice_vsi_disable_tx(&sc->pf_vsi);
1933 }
1934 
1935 /**
1936  * ice_poll_for_media_avail - Re-enable link if media is detected
1937  * @sc: device private structure
1938  *
1939  * Intended to be called from the driver's timer function, this function
1940  * sends the Get Link Status AQ command and re-enables HW link if the
1941  * command says that media is available.
1942  *
1943  * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing,
1944  * since media removal events are supposed to be sent to the driver through
1945  * a link status event.
1946  */
1947 static void
1948 ice_poll_for_media_avail(struct ice_softc *sc)
1949 {
1950 	struct ice_hw *hw = &sc->hw;
1951 	struct ice_port_info *pi = hw->port_info;
1952 
1953 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) {
1954 		pi->phy.get_link_info = true;
1955 		ice_get_link_status(pi, &sc->link_up);
1956 
1957 		if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
1958 			enum ice_status status;
1959 
1960 			/* Re-enable link and re-apply user link settings */
1961 			ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
1962 
1963 			/* Update the OS about changes in media capability */
1964 			status = ice_add_media_types(sc, sc->media);
1965 			if (status)
1966 				device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
1967 					      ice_status_str(status),
1968 					      ice_aq_str(hw->adminq.sq_last_status));
1969 
1970 			ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
1971 		}
1972 	}
1973 }
1974 
1975 /**
1976  * ice_if_timer - called by iflib periodically
1977  * @ctx: iflib ctx structure
1978  * @qid: the queue this timer was called for
1979  *
1980  * This callback is triggered by iflib periodically. We use it to update the
1981  * hw statistics.
1982  *
1983  * @remark this function is not protected by the iflib CTX lock.
1984  */
1985 static void
1986 ice_if_timer(if_ctx_t ctx, uint16_t qid)
1987 {
1988 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1989 	uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx;
1990 
1991 	if (qid != 0)
1992 		return;
1993 
1994 	/* Do not attempt to update stats when in recovery mode */
1995 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1996 		return;
1997 
1998 	/* Update device statistics */
1999 	ice_update_pf_stats(sc);
2000 
2001 	/*
2002 	 * For proper watchdog management, the iflib stack needs to know if
2003 	 * we've been paused during the last interval. Check if the
2004 	 * link_xoff_rx stat changed, and set the isc_pause_frames, if so.
2005 	 */
2006 	if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx)
2007 		sc->scctx->isc_pause_frames = 1;
2008 
2009 	/* Update the primary VSI stats */
2010 	ice_update_vsi_hw_stats(&sc->pf_vsi);
2011 }
2012 
2013 /**
2014  * ice_admin_timer - called periodically to trigger the admin task
2015  * @arg: callout(9) argument pointing to the device private softc structure
2016  *
2017  * Timer function used as part of a callout(9) timer that will periodically
2018  * trigger the admin task, even when the interface is down.
2019  *
2020  * @remark this function is not called by iflib and is not protected by the
2021  * iflib CTX lock.
2022  *
2023  * @remark because this is a callout function, it cannot sleep and should not
2024  * attempt taking the iflib CTX lock.
2025  */
2026 static void
2027 ice_admin_timer(void *arg)
2028 {
2029 	struct ice_softc *sc = (struct ice_softc *)arg;
2030 
2031 	/*
2032 	 * There is a point where callout routines are no longer
2033 	 * cancelable.  So there exists a window of time where the
2034 	 * driver enters detach() and tries to cancel the callout, but the
2035 	 * callout routine has passed the cancellation point.  The detach()
2036 	 * routine is unaware of this and tries to free resources that the
2037 	 * callout routine needs.  So we check for the detach state flag to
2038 	 * at least shrink the window of opportunity.
2039 	 */
2040 	if (ice_driver_is_detaching(sc))
2041 		return;
2042 
2043 	/* Fire off the admin task */
2044 	iflib_admin_intr_deferred(sc->ctx);
2045 
2046 	/* Reschedule the admin timer */
2047 	callout_schedule(&sc->admin_timer, hz/2);
2048 }
2049 
2050 /**
2051  * ice_transition_recovery_mode - Transition to recovery mode
2052  * @sc: the device private softc
2053  *
2054  * Called when the driver detects that the firmware has entered recovery mode
2055  * at run time.
2056  */
2057 static void
2058 ice_transition_recovery_mode(struct ice_softc *sc)
2059 {
2060 	struct ice_vsi *vsi = &sc->pf_vsi;
2061 	int i;
2062 
2063 	device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
2064 
2065 	/* Tell the stack that the link has gone down */
2066 	iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
2067 
2068 	/* Request that the device be re-initialized */
2069 	ice_request_stack_reinit(sc);
2070 
2071 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2072 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2073 
2074 	ice_vsi_del_txqs_ctx(vsi);
2075 	ice_vsi_del_rxqs_ctx(vsi);
2076 
2077 	for (i = 0; i < sc->num_available_vsi; i++) {
2078 		if (sc->all_vsi[i])
2079 			ice_release_vsi(sc->all_vsi[i]);
2080 	}
2081 	sc->num_available_vsi = 0;
2082 
2083 	if (sc->all_vsi) {
2084 		free(sc->all_vsi, M_ICE);
2085 		sc->all_vsi = NULL;
2086 	}
2087 
2088 	/* Destroy the interrupt manager */
2089 	ice_resmgr_destroy(&sc->imgr);
2090 	/* Destroy the queue managers */
2091 	ice_resmgr_destroy(&sc->tx_qmgr);
2092 	ice_resmgr_destroy(&sc->rx_qmgr);
2093 
2094 	ice_deinit_hw(&sc->hw);
2095 }
2096 
2097 /**
2098  * ice_transition_safe_mode - Transition to safe mode
2099  * @sc: the device private softc
2100  *
2101  * Called when the driver attempts to reload the DDP package during a device
2102  * reset, and the new download fails. If so, we must transition to safe mode
2103  * at run time.
2104  *
2105  * @remark although safe mode normally allocates only a single queue, we can't
2106  * change the number of queues dynamically when using iflib. Due to this, we
2107  * do not attempt to reduce the number of queues.
2108  */
2109 static void
2110 ice_transition_safe_mode(struct ice_softc *sc)
2111 {
2112 	/* Indicate that we are in Safe mode */
2113 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
2114 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
2115 
2116 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2117 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2118 
2119 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2120 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en);
2121 }
2122 
2123 /**
2124  * ice_if_update_admin_status - update admin status
2125  * @ctx: iflib ctx structure
2126  *
2127  * Called by iflib to update the admin status. For our purposes, this means
2128  * check the adminq, and update the link status. It's ultimately triggered by
2129  * our admin interrupt, or by the ice_if_timer periodically.
2130  *
2131  * @pre assumes the caller holds the iflib CTX lock
2132  */
2133 static void
2134 ice_if_update_admin_status(if_ctx_t ctx)
2135 {
2136 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2137 	enum ice_fw_modes fw_mode;
2138 	bool reschedule = false;
2139 	u16 pending = 0;
2140 
2141 	ASSERT_CTX_LOCKED(sc);
2142 
2143 	/* Check if the firmware entered recovery mode at run time */
2144 	fw_mode = ice_get_fw_mode(&sc->hw);
2145 	if (fw_mode == ICE_FW_MODE_REC) {
2146 		if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2147 			/* If we just entered recovery mode, log a warning to
2148 			 * the system administrator and deinit driver state
2149 			 * that is no longer functional.
2150 			 */
2151 			ice_transition_recovery_mode(sc);
2152 		}
2153 	} else if (fw_mode == ICE_FW_MODE_ROLLBACK) {
2154 		if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) {
2155 			/* Rollback mode isn't fatal, but we don't want to
2156 			 * repeatedly post a message about it.
2157 			 */
2158 			ice_print_rollback_msg(&sc->hw);
2159 		}
2160 	}
2161 
2162 	/* Handle global reset events */
2163 	ice_handle_reset_event(sc);
2164 
2165 	/* Handle PF reset requests */
2166 	ice_handle_pf_reset_request(sc);
2167 
2168 	/* Handle MDD events */
2169 	ice_handle_mdd_event(sc);
2170 
2171 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) ||
2172 	    ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) ||
2173 	    ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2174 		/*
2175 		 * If we know the control queues are disabled, skip processing
2176 		 * the control queues entirely.
2177 		 */
2178 		;
2179 	} else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) {
2180 		ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending);
2181 		if (pending > 0)
2182 			reschedule = true;
2183 
2184 		ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending);
2185 		if (pending > 0)
2186 			reschedule = true;
2187 	}
2188 
2189 	/* Poll for link up */
2190 	ice_poll_for_media_avail(sc);
2191 
2192 	/* Check and update link status */
2193 	ice_update_link_status(sc, false);
2194 
2195 	/*
2196 	 * If there are still messages to process, we need to reschedule
2197 	 * ourselves. Otherwise, we can just re-enable the interrupt. We'll be
2198 	 * woken up at the next interrupt or timer event.
2199 	 */
2200 	if (reschedule) {
2201 		ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
2202 		iflib_admin_intr_deferred(ctx);
2203 	} else {
2204 		ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2205 	}
2206 }
2207 
2208 /**
2209  * ice_prepare_for_reset - Prepare device for an impending reset
2210  * @sc: The device private softc
2211  *
2212  * Prepare the driver for an impending reset, shutting down VSIs, clearing the
2213  * scheduler setup, and shutting down controlqs. Uses the
2214  * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the
2215  * driver for reset or not.
2216  */
2217 static void
2218 ice_prepare_for_reset(struct ice_softc *sc)
2219 {
2220 	struct ice_hw *hw = &sc->hw;
2221 
2222 	/* If we're already prepared, there's nothing to do */
2223 	if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET))
2224 		return;
2225 
2226 	log(LOG_INFO, "%s: preparing to reset device logic\n", sc->ifp->if_xname);
2227 
2228 	/* In recovery mode, hardware is not initialized */
2229 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2230 		return;
2231 
2232 	/* Release the main PF VSI queue mappings */
2233 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2234 				    sc->pf_vsi.num_tx_queues);
2235 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2236 				    sc->pf_vsi.num_rx_queues);
2237 
2238 	ice_clear_hw_tbls(hw);
2239 
2240 	if (hw->port_info)
2241 		ice_sched_clear_port(hw->port_info);
2242 
2243 	ice_shutdown_all_ctrlq(hw);
2244 }
2245 
2246 /**
2247  * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping
2248  * @sc: the device softc pointer
2249  *
2250  * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue
2251  * mapping after a reset occurred.
2252  */
2253 static int
2254 ice_rebuild_pf_vsi_qmap(struct ice_softc *sc)
2255 {
2256 	struct ice_vsi *vsi = &sc->pf_vsi;
2257 	struct ice_tx_queue *txq;
2258 	struct ice_rx_queue *rxq;
2259 	int err, i;
2260 
2261 	/* Re-assign Tx queues from PF space to the main VSI */
2262 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap,
2263 					    vsi->num_tx_queues);
2264 	if (err) {
2265 		device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n",
2266 			      ice_err_str(err));
2267 		return (err);
2268 	}
2269 
2270 	/* Re-assign Rx queues from PF space to this VSI */
2271 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap,
2272 					    vsi->num_rx_queues);
2273 	if (err) {
2274 		device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n",
2275 			      ice_err_str(err));
2276 		goto err_release_tx_queues;
2277 	}
2278 
2279 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
2280 
2281 	/* Re-assign Tx queue tail pointers */
2282 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
2283 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
2284 
2285 	/* Re-assign Rx queue tail pointers */
2286 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
2287 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
2288 
2289 	return (0);
2290 
2291 err_release_tx_queues:
2292 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2293 				   sc->pf_vsi.num_tx_queues);
2294 
2295 	return (err);
2296 }
2297 
2298 /* determine if the iflib context is active */
2299 #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING))
2300 
2301 /**
2302  * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode
2303  * @sc: The device private softc
2304  *
2305  * Handle a driver rebuild while in recovery mode. This will only rebuild the
2306  * limited functionality supported while in recovery mode.
2307  */
2308 static void
2309 ice_rebuild_recovery_mode(struct ice_softc *sc)
2310 {
2311 	device_t dev = sc->dev;
2312 
2313 	/* enable PCIe bus master */
2314 	pci_enable_busmaster(dev);
2315 
2316 	/* Configure interrupt causes for the administrative interrupt */
2317 	ice_configure_misc_interrupts(sc);
2318 
2319 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2320 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2321 
2322 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2323 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2324 
2325 	log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname);
2326 
2327 	/* In order to completely restore device functionality, the iflib core
2328 	 * needs to be reset. We need to request an iflib reset. Additionally,
2329 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2330 	 * the iflib core, we also want re-run the admin task so that iflib
2331 	 * resets immediately instead of waiting for the next interrupt.
2332 	 */
2333 	ice_request_stack_reinit(sc);
2334 
2335 	return;
2336 }
2337 
2338 /**
2339  * ice_rebuild - Rebuild driver state post reset
2340  * @sc: The device private softc
2341  *
2342  * Restore driver state after a reset occurred. Restart the controlqs, setup
2343  * the hardware port, and re-enable the VSIs.
2344  */
2345 static void
2346 ice_rebuild(struct ice_softc *sc)
2347 {
2348 	struct ice_hw *hw = &sc->hw;
2349 	device_t dev = sc->dev;
2350 	enum ice_status status;
2351 	int err;
2352 
2353 	sc->rebuild_ticks = ticks;
2354 
2355 	/* If we're rebuilding, then a reset has succeeded. */
2356 	ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED);
2357 
2358 	/*
2359 	 * If the firmware is in recovery mode, only restore the limited
2360 	 * functionality supported by recovery mode.
2361 	 */
2362 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2363 		ice_rebuild_recovery_mode(sc);
2364 		return;
2365 	}
2366 
2367 	/* enable PCIe bus master */
2368 	pci_enable_busmaster(dev);
2369 
2370 	status = ice_init_all_ctrlq(hw);
2371 	if (status) {
2372 		device_printf(dev, "failed to re-init controlqs, err %s\n",
2373 			      ice_status_str(status));
2374 		goto err_shutdown_ctrlq;
2375 	}
2376 
2377 	/* Query the allocated resources for Tx scheduler */
2378 	status = ice_sched_query_res_alloc(hw);
2379 	if (status) {
2380 		device_printf(dev,
2381 			      "Failed to query scheduler resources, err %s aq_err %s\n",
2382 			      ice_status_str(status),
2383 			      ice_aq_str(hw->adminq.sq_last_status));
2384 		goto err_shutdown_ctrlq;
2385 	}
2386 
2387 	/* Re-enable FW logging. Keep going even if this fails */
2388 	status = ice_fwlog_set(hw, &hw->fwlog_cfg);
2389 	if (!status) {
2390 		/*
2391 		 * We should have the most updated cached copy of the
2392 		 * configuration, regardless of whether we're rebuilding
2393 		 * or not.  So we'll simply check to see if logging was
2394 		 * enabled pre-rebuild.
2395 		 */
2396 		if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2397 			status = ice_fwlog_register(hw);
2398 			if (status)
2399 				device_printf(dev, "failed to re-register fw logging, err %s aq_err %s\n",
2400 				   ice_status_str(status),
2401 				   ice_aq_str(hw->adminq.sq_last_status));
2402 		}
2403 	} else
2404 		device_printf(dev, "failed to rebuild fw logging configuration, err %s aq_err %s\n",
2405 		   ice_status_str(status),
2406 		   ice_aq_str(hw->adminq.sq_last_status));
2407 
2408 	err = ice_send_version(sc);
2409 	if (err)
2410 		goto err_shutdown_ctrlq;
2411 
2412 	err = ice_init_link_events(sc);
2413 	if (err) {
2414 		device_printf(dev, "ice_init_link_events failed: %s\n",
2415 			      ice_err_str(err));
2416 		goto err_shutdown_ctrlq;
2417 	}
2418 
2419 	status = ice_clear_pf_cfg(hw);
2420 	if (status) {
2421 		device_printf(dev, "failed to clear PF configuration, err %s\n",
2422 			      ice_status_str(status));
2423 		goto err_shutdown_ctrlq;
2424 	}
2425 
2426 	ice_clear_pxe_mode(hw);
2427 
2428 	status = ice_get_caps(hw);
2429 	if (status) {
2430 		device_printf(dev, "failed to get capabilities, err %s\n",
2431 			      ice_status_str(status));
2432 		goto err_shutdown_ctrlq;
2433 	}
2434 
2435 	status = ice_sched_init_port(hw->port_info);
2436 	if (status) {
2437 		device_printf(dev, "failed to initialize port, err %s\n",
2438 			      ice_status_str(status));
2439 		goto err_sched_cleanup;
2440 	}
2441 
2442 	/* If we previously loaded the package, it needs to be reloaded now */
2443 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) {
2444 		status = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
2445 		if (status) {
2446 			ice_log_pkg_init(sc, &status);
2447 
2448 			ice_transition_safe_mode(sc);
2449 		}
2450 	}
2451 
2452 	ice_reset_pf_stats(sc);
2453 
2454 	err = ice_rebuild_pf_vsi_qmap(sc);
2455 	if (err) {
2456 		device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n",
2457 			      ice_err_str(err));
2458 		goto err_sched_cleanup;
2459 	}
2460 	err = ice_initialize_vsi(&sc->pf_vsi);
2461 	if (err) {
2462 		device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n",
2463 			      ice_err_str(err));
2464 		goto err_release_queue_allocations;
2465 	}
2466 
2467 	/* Replay all VSI configuration */
2468 	err = ice_replay_all_vsi_cfg(sc);
2469 	if (err)
2470 		goto err_deinit_pf_vsi;
2471 
2472 	/* Re-enable FW health event reporting */
2473 	ice_init_health_events(sc);
2474 
2475 	/* Reconfigure the main PF VSI for RSS */
2476 	err = ice_config_rss(&sc->pf_vsi);
2477 	if (err) {
2478 		device_printf(sc->dev,
2479 			      "Unable to reconfigure RSS for the main VSI, err %s\n",
2480 			      ice_err_str(err));
2481 		goto err_deinit_pf_vsi;
2482 	}
2483 
2484 	/* Refresh link status */
2485 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2486 	sc->hw.port_info->phy.get_link_info = true;
2487 	ice_get_link_status(sc->hw.port_info, &sc->link_up);
2488 	ice_update_link_status(sc, true);
2489 
2490 	/* Configure interrupt causes for the administrative interrupt */
2491 	ice_configure_misc_interrupts(sc);
2492 
2493 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2494 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2495 
2496 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2497 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2498 
2499 	log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname);
2500 
2501 	/* In order to completely restore device functionality, the iflib core
2502 	 * needs to be reset. We need to request an iflib reset. Additionally,
2503 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2504 	 * the iflib core, we also want re-run the admin task so that iflib
2505 	 * resets immediately instead of waiting for the next interrupt.
2506 	 */
2507 	ice_request_stack_reinit(sc);
2508 
2509 	return;
2510 
2511 err_deinit_pf_vsi:
2512 	ice_deinit_vsi(&sc->pf_vsi);
2513 err_release_queue_allocations:
2514 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2515 				    sc->pf_vsi.num_tx_queues);
2516 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2517 				    sc->pf_vsi.num_rx_queues);
2518 err_sched_cleanup:
2519 	ice_sched_cleanup_all(hw);
2520 err_shutdown_ctrlq:
2521 	ice_shutdown_all_ctrlq(hw);
2522 	ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2523 	device_printf(dev, "Driver rebuild failed, please reload the device driver\n");
2524 }
2525 
2526 /**
2527  * ice_handle_reset_event - Handle reset events triggered by OICR
2528  * @sc: The device private softc
2529  *
2530  * Handle reset events triggered by an OICR notification. This includes CORER,
2531  * GLOBR, and EMPR resets triggered by software on this or any other PF or by
2532  * firmware.
2533  *
2534  * @pre assumes the iflib context lock is held, and will unlock it while
2535  * waiting for the hardware to finish reset.
2536  */
2537 static void
2538 ice_handle_reset_event(struct ice_softc *sc)
2539 {
2540 	struct ice_hw *hw = &sc->hw;
2541 	enum ice_status status;
2542 	device_t dev = sc->dev;
2543 
2544 	/* When a CORER, GLOBR, or EMPR is about to happen, the hardware will
2545 	 * trigger an OICR interrupt. Our OICR handler will determine when
2546 	 * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as
2547 	 * appropriate.
2548 	 */
2549 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
2550 		return;
2551 
2552 	ice_prepare_for_reset(sc);
2553 
2554 	/*
2555 	 * Release the iflib context lock and wait for the device to finish
2556 	 * resetting.
2557 	 */
2558 	IFLIB_CTX_UNLOCK(sc);
2559 	status = ice_check_reset(hw);
2560 	IFLIB_CTX_LOCK(sc);
2561 	if (status) {
2562 		device_printf(dev, "Device never came out of reset, err %s\n",
2563 			      ice_status_str(status));
2564 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2565 		return;
2566 	}
2567 
2568 	/* We're done with the reset, so we can rebuild driver state */
2569 	sc->hw.reset_ongoing = false;
2570 	ice_rebuild(sc);
2571 
2572 	/* In the unlikely event that a PF reset request occurs at the same
2573 	 * time as a global reset, clear the request now. This avoids
2574 	 * resetting a second time right after we reset due to a global event.
2575 	 */
2576 	if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2577 		device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n");
2578 }
2579 
2580 /**
2581  * ice_handle_pf_reset_request - Initiate PF reset requested by software
2582  * @sc: The device private softc
2583  *
2584  * Initiate a PF reset requested by software. We handle this in the admin task
2585  * so that only one thread actually handles driver preparation and cleanup,
2586  * rather than having multiple threads possibly attempt to run this code
2587  * simultaneously.
2588  *
2589  * @pre assumes the iflib context lock is held and will unlock it while
2590  * waiting for the PF reset to complete.
2591  */
2592 static void
2593 ice_handle_pf_reset_request(struct ice_softc *sc)
2594 {
2595 	struct ice_hw *hw = &sc->hw;
2596 	enum ice_status status;
2597 
2598 	/* Check for PF reset requests */
2599 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2600 		return;
2601 
2602 	/* Make sure we're prepared for reset */
2603 	ice_prepare_for_reset(sc);
2604 
2605 	/*
2606 	 * Release the iflib context lock and wait for the device to finish
2607 	 * resetting.
2608 	 */
2609 	IFLIB_CTX_UNLOCK(sc);
2610 	status = ice_reset(hw, ICE_RESET_PFR);
2611 	IFLIB_CTX_LOCK(sc);
2612 	if (status) {
2613 		device_printf(sc->dev, "device PF reset failed, err %s\n",
2614 			      ice_status_str(status));
2615 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2616 		return;
2617 	}
2618 
2619 	sc->soft_stats.pfr_count++;
2620 	ice_rebuild(sc);
2621 }
2622 
2623 /**
2624  * ice_init_device_features - Init device driver features
2625  * @sc: driver softc structure
2626  *
2627  * @pre assumes that the function capabilities bits have been set up by
2628  * ice_init_hw().
2629  */
2630 static void
2631 ice_init_device_features(struct ice_softc *sc)
2632 {
2633 	/*
2634 	 * A failed pkg file download triggers safe mode, disabling advanced
2635 	 * device feature support
2636 	 */
2637 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE))
2638 		return;
2639 
2640 	/* Set capabilities that all devices support */
2641 	ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2642 	ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
2643 	ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
2644 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap);
2645 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap);
2646 	ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2647 	ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2648 	ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_cap);
2649 
2650 	/* Disable features due to hardware limitations... */
2651 	if (!sc->hw.func_caps.common_cap.rss_table_size)
2652 		ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2653 	/* Disable features due to firmware limitations... */
2654 	if (!ice_is_fw_health_report_supported(&sc->hw))
2655 		ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2656 	if (!ice_fwlog_supported(&sc->hw))
2657 		ice_clear_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2658 	if (sc->hw.fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2659 		if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_FW_LOGGING))
2660 			ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_en);
2661 		else
2662 			ice_fwlog_unregister(&sc->hw);
2663 	}
2664 
2665 	/* Disable capabilities not supported by the OS */
2666 	ice_disable_unsupported_features(sc->feat_cap);
2667 
2668 	/* RSS is always enabled for iflib */
2669 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS))
2670 		ice_set_bit(ICE_FEATURE_RSS, sc->feat_en);
2671 }
2672 
2673 /**
2674  * ice_if_multi_set - Callback to update Multicast filters in HW
2675  * @ctx: iflib ctx structure
2676  *
2677  * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search
2678  * the if_multiaddrs list and determine which filters have been added or
2679  * removed from the list, and update HW programming to reflect the new list.
2680  *
2681  * @pre assumes the caller holds the iflib CTX lock
2682  */
2683 static void
2684 ice_if_multi_set(if_ctx_t ctx)
2685 {
2686 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2687 	int err;
2688 
2689 	ASSERT_CTX_LOCKED(sc);
2690 
2691 	/* Do not handle multicast configuration in recovery mode */
2692 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2693 		return;
2694 
2695 	err = ice_sync_multicast_filters(sc);
2696 	if (err) {
2697 		device_printf(sc->dev,
2698 			      "Failed to synchronize multicast filter list: %s\n",
2699 			      ice_err_str(err));
2700 		return;
2701 	}
2702 }
2703 
2704 /**
2705  * ice_if_vlan_register - Register a VLAN with the hardware
2706  * @ctx: iflib ctx pointer
2707  * @vtag: VLAN to add
2708  *
2709  * Programs the main PF VSI with a hardware filter for the given VLAN.
2710  *
2711  * @pre assumes the caller holds the iflib CTX lock
2712  */
2713 static void
2714 ice_if_vlan_register(if_ctx_t ctx, u16 vtag)
2715 {
2716 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2717 	enum ice_status status;
2718 
2719 	ASSERT_CTX_LOCKED(sc);
2720 
2721 	/* Do not handle VLAN configuration in recovery mode */
2722 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2723 		return;
2724 
2725 	status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag);
2726 	if (status) {
2727 		device_printf(sc->dev,
2728 			      "Failure adding VLAN %d to main VSI, err %s aq_err %s\n",
2729 			      vtag, ice_status_str(status),
2730 			      ice_aq_str(sc->hw.adminq.sq_last_status));
2731 	}
2732 }
2733 
2734 /**
2735  * ice_if_vlan_unregister - Remove a VLAN filter from the hardware
2736  * @ctx: iflib ctx pointer
2737  * @vtag: VLAN to add
2738  *
2739  * Removes the previously programmed VLAN filter from the main PF VSI.
2740  *
2741  * @pre assumes the caller holds the iflib CTX lock
2742  */
2743 static void
2744 ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag)
2745 {
2746 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2747 	enum ice_status status;
2748 
2749 	ASSERT_CTX_LOCKED(sc);
2750 
2751 	/* Do not handle VLAN configuration in recovery mode */
2752 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2753 		return;
2754 
2755 	status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag);
2756 	if (status) {
2757 		device_printf(sc->dev,
2758 			      "Failure removing VLAN %d from main VSI, err %s aq_err %s\n",
2759 			      vtag, ice_status_str(status),
2760 			      ice_aq_str(sc->hw.adminq.sq_last_status));
2761 	}
2762 }
2763 
2764 /**
2765  * ice_if_stop - Stop the device
2766  * @ctx: iflib context structure
2767  *
2768  * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0
2769  * down)
2770  *
2771  * @pre assumes the caller holds the iflib CTX lock
2772  */
2773 static void
2774 ice_if_stop(if_ctx_t ctx)
2775 {
2776 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2777 
2778 	ASSERT_CTX_LOCKED(sc);
2779 
2780 	/*
2781 	 * The iflib core may call IFDI_STOP prior to the first call to
2782 	 * IFDI_INIT. This will cause us to attempt to remove MAC filters we
2783 	 * don't have, and disable Tx queues which aren't yet configured.
2784 	 * Although it is likely these extra operations are harmless, they do
2785 	 * cause spurious warning messages to be displayed, which may confuse
2786 	 * users.
2787 	 *
2788 	 * To avoid these messages, we use a state bit indicating if we've
2789 	 * been initialized. It will be set when ice_if_init is called, and
2790 	 * cleared here in ice_if_stop.
2791 	 */
2792 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
2793 		return;
2794 
2795 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
2796 		device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n");
2797 		return;
2798 	}
2799 
2800 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
2801 		device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n");
2802 		return;
2803 	}
2804 
2805 	/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
2806 	 * return of these functions because there's nothing we can really do
2807 	 * if they fail, and the functions already print error messages.
2808 	 * Just try to shut down as much as we can.
2809 	 */
2810 	ice_rm_pf_default_mac_filters(sc);
2811 
2812 	/* Dissociate the Tx and Rx queues from the interrupts */
2813 	ice_flush_txq_interrupts(&sc->pf_vsi);
2814 	ice_flush_rxq_interrupts(&sc->pf_vsi);
2815 
2816 	/* Disable the Tx and Rx queues */
2817 	ice_vsi_disable_tx(&sc->pf_vsi);
2818 	ice_control_rx_queues(&sc->pf_vsi, false);
2819 }
2820 
2821 /**
2822  * ice_if_get_counter - Get current value of an ifnet statistic
2823  * @ctx: iflib context pointer
2824  * @counter: ifnet counter to read
2825  *
2826  * Reads the current value of an ifnet counter for the device.
2827  *
2828  * This function is not protected by the iflib CTX lock.
2829  */
2830 static uint64_t
2831 ice_if_get_counter(if_ctx_t ctx, ift_counter counter)
2832 {
2833 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2834 
2835 	/* Return the counter for the main PF VSI */
2836 	return ice_get_ifnet_counter(&sc->pf_vsi, counter);
2837 }
2838 
2839 /**
2840  * ice_request_stack_reinit - Request that iflib re-initialize
2841  * @sc: the device private softc
2842  *
2843  * Request that the device be brought down and up, to re-initialize. For
2844  * example, this may be called when a device reset occurs, or when Tx and Rx
2845  * queues need to be re-initialized.
2846  *
2847  * This is required because the iflib state is outside the driver, and must be
2848  * re-initialized if we need to resart Tx and Rx queues.
2849  */
2850 void
2851 ice_request_stack_reinit(struct ice_softc *sc)
2852 {
2853 	if (CTX_ACTIVE(sc->ctx)) {
2854 		iflib_request_reset(sc->ctx);
2855 		iflib_admin_intr_deferred(sc->ctx);
2856 	}
2857 }
2858 
2859 /**
2860  * ice_driver_is_detaching - Check if the driver is detaching/unloading
2861  * @sc: device private softc
2862  *
2863  * Returns true if the driver is detaching, false otherwise.
2864  *
2865  * @remark on newer kernels, take advantage of iflib_in_detach in order to
2866  * report detachment correctly as early as possible.
2867  *
2868  * @remark this function is used by various code paths that want to avoid
2869  * running if the driver is about to be removed. This includes sysctls and
2870  * other driver access points. Note that it does not fully resolve
2871  * detach-based race conditions as it is possible for a thread to race with
2872  * iflib_in_detach.
2873  */
2874 bool
2875 ice_driver_is_detaching(struct ice_softc *sc)
2876 {
2877 	return (ice_test_state(&sc->state, ICE_STATE_DETACHING) ||
2878 		iflib_in_detach(sc->ctx));
2879 }
2880 
2881 /**
2882  * ice_if_priv_ioctl - Device private ioctl handler
2883  * @ctx: iflib context pointer
2884  * @command: The ioctl command issued
2885  * @data: ioctl specific data
2886  *
2887  * iflib callback for handling custom driver specific ioctls.
2888  *
2889  * @pre Assumes that the iflib context lock is held.
2890  */
2891 static int
2892 ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
2893 {
2894 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2895 	struct ifdrv *ifd;
2896 	device_t dev = sc->dev;
2897 
2898 	if (data == NULL)
2899 		return (EINVAL);
2900 
2901 	ASSERT_CTX_LOCKED(sc);
2902 
2903 	/* Make sure the command type is valid */
2904 	switch (command) {
2905 	case SIOCSDRVSPEC:
2906 	case SIOCGDRVSPEC:
2907 		/* Accepted commands */
2908 		break;
2909 	case SIOCGPRIVATE_0:
2910 		/*
2911 		 * Although we do not support this ioctl command, it's
2912 		 * expected that iflib will forward it to the IFDI_PRIV_IOCTL
2913 		 * handler. Do not print a message in this case
2914 		 */
2915 		return (ENOTSUP);
2916 	default:
2917 		/*
2918 		 * If we get a different command for this function, it's
2919 		 * definitely unexpected, so log a message indicating what
2920 		 * command we got for debugging purposes.
2921 		 */
2922 		device_printf(dev, "%s: unexpected ioctl command %08lx\n",
2923 			      __func__, command);
2924 		return (EINVAL);
2925 	}
2926 
2927 	ifd = (struct ifdrv *)data;
2928 
2929 	switch (ifd->ifd_cmd) {
2930 	case ICE_NVM_ACCESS:
2931 		return ice_handle_nvm_access_ioctl(sc, ifd);
2932 	default:
2933 		return EINVAL;
2934 	}
2935 }
2936 
2937 /**
2938  * ice_if_i2c_req - I2C request handler for iflib
2939  * @ctx: iflib context pointer
2940  * @req: The I2C parameters to use
2941  *
2942  * Read from the port's I2C eeprom using the parameters from the ioctl.
2943  *
2944  * @remark The iflib-only part is pretty simple.
2945  */
2946 static int
2947 ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req)
2948 {
2949 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2950 
2951 	return ice_handle_i2c_req(sc, req);
2952 }
2953 
2954 /**
2955  * ice_if_suspend - PCI device suspend handler for iflib
2956  * @ctx: iflib context pointer
2957  *
2958  * Deinitializes the driver and clears HW resources in preparation for
2959  * suspend or an FLR.
2960  *
2961  * @returns 0; this return value is ignored
2962  */
2963 static int
2964 ice_if_suspend(if_ctx_t ctx)
2965 {
2966 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2967 
2968 	/* At least a PFR is always going to happen after this;
2969 	 * either via FLR or during the D3->D0 transition.
2970 	 */
2971 	ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
2972 
2973 	ice_prepare_for_reset(sc);
2974 
2975 	return (0);
2976 }
2977 
2978 /**
2979  * ice_if_resume - PCI device resume handler for iflib
2980  * @ctx: iflib context pointer
2981  *
2982  * Reinitializes the driver and the HW after PCI resume or after
2983  * an FLR. An init is performed by iflib after this function is finished.
2984  *
2985  * @returns 0; this return value is ignored
2986  */
2987 static int
2988 ice_if_resume(if_ctx_t ctx)
2989 {
2990 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2991 
2992 	ice_rebuild(sc);
2993 
2994 	return (0);
2995 }
2996 
2997