xref: /freebsd/sys/dev/ice/if_ice_iflib.c (revision 2a63c3be158216222d89a073dcbd6a72ee4aab5a)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2023, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /**
33  * @file if_ice_iflib.c
34  * @brief iflib driver implementation
35  *
36  * Contains the main entry point for the iflib driver implementation. It
37  * implements the various ifdi driver methods, and sets up the module and
38  * driver values to load an iflib driver.
39  */
40 
41 #include "ice_iflib.h"
42 #include "ice_drv_info.h"
43 #include "ice_switch.h"
44 #include "ice_sched.h"
45 
46 #include <sys/module.h>
47 #include <sys/sockio.h>
48 #include <sys/smp.h>
49 #include <dev/pci/pcivar.h>
50 #include <dev/pci/pcireg.h>
51 
52 /*
53  * Device method prototypes
54  */
55 
56 static void *ice_register(device_t);
57 static int  ice_if_attach_pre(if_ctx_t);
58 static int  ice_attach_pre_recovery_mode(struct ice_softc *sc);
59 static int  ice_if_attach_post(if_ctx_t);
60 static void ice_attach_post_recovery_mode(struct ice_softc *sc);
61 static int  ice_if_detach(if_ctx_t);
62 static int  ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets);
63 static int  ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets);
64 static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix);
65 static void ice_if_queues_free(if_ctx_t ctx);
66 static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu);
67 static void ice_if_intr_enable(if_ctx_t ctx);
68 static void ice_if_intr_disable(if_ctx_t ctx);
69 static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
70 static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
71 static int ice_if_promisc_set(if_ctx_t ctx, int flags);
72 static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
73 static int ice_if_media_change(if_ctx_t ctx);
74 static void ice_if_init(if_ctx_t ctx);
75 static void ice_if_timer(if_ctx_t ctx, uint16_t qid);
76 static void ice_if_update_admin_status(if_ctx_t ctx);
77 static void ice_if_multi_set(if_ctx_t ctx);
78 static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag);
79 static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
80 static void ice_if_stop(if_ctx_t ctx);
81 static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter);
82 static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
83 static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
84 static int ice_if_suspend(if_ctx_t ctx);
85 static int ice_if_resume(if_ctx_t ctx);
86 
87 static int ice_msix_que(void *arg);
88 static int ice_msix_admin(void *arg);
89 
90 /*
91  * Helper function prototypes
92  */
93 static int ice_pci_mapping(struct ice_softc *sc);
94 static void ice_free_pci_mapping(struct ice_softc *sc);
95 static void ice_update_link_status(struct ice_softc *sc, bool update_media);
96 static void ice_init_device_features(struct ice_softc *sc);
97 static void ice_init_tx_tracking(struct ice_vsi *vsi);
98 static void ice_handle_reset_event(struct ice_softc *sc);
99 static void ice_handle_pf_reset_request(struct ice_softc *sc);
100 static void ice_prepare_for_reset(struct ice_softc *sc);
101 static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc);
102 static void ice_rebuild(struct ice_softc *sc);
103 static void ice_rebuild_recovery_mode(struct ice_softc *sc);
104 static void ice_free_irqvs(struct ice_softc *sc);
105 static void ice_update_rx_mbuf_sz(struct ice_softc *sc);
106 static void ice_poll_for_media_avail(struct ice_softc *sc);
107 static void ice_setup_scctx(struct ice_softc *sc);
108 static int ice_allocate_msix(struct ice_softc *sc);
109 static void ice_admin_timer(void *arg);
110 static void ice_transition_recovery_mode(struct ice_softc *sc);
111 static void ice_transition_safe_mode(struct ice_softc *sc);
112 
113 /*
114  * Device Interface Declaration
115  */
116 
117 /**
118  * @var ice_methods
119  * @brief ice driver method entry points
120  *
121  * List of device methods implementing the generic device interface used by
122  * the device stack to interact with the ice driver. Since this is an iflib
123  * driver, most of the methods point to the generic iflib implementation.
124  */
125 static device_method_t ice_methods[] = {
126 	/* Device interface */
127 	DEVMETHOD(device_register, ice_register),
128 	DEVMETHOD(device_probe,    iflib_device_probe_vendor),
129 	DEVMETHOD(device_attach,   iflib_device_attach),
130 	DEVMETHOD(device_detach,   iflib_device_detach),
131 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
132 	DEVMETHOD(device_suspend,  iflib_device_suspend),
133 	DEVMETHOD(device_resume,   iflib_device_resume),
134 	DEVMETHOD_END
135 };
136 
137 /**
138  * @var ice_iflib_methods
139  * @brief iflib method entry points
140  *
141  * List of device methods used by the iflib stack to interact with this
142  * driver. These are the real main entry points used to interact with this
143  * driver.
144  */
145 static device_method_t ice_iflib_methods[] = {
146 	DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre),
147 	DEVMETHOD(ifdi_attach_post, ice_if_attach_post),
148 	DEVMETHOD(ifdi_detach, ice_if_detach),
149 	DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc),
150 	DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc),
151 	DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign),
152 	DEVMETHOD(ifdi_queues_free, ice_if_queues_free),
153 	DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set),
154 	DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable),
155 	DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable),
156 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable),
157 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable),
158 	DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set),
159 	DEVMETHOD(ifdi_media_status, ice_if_media_status),
160 	DEVMETHOD(ifdi_media_change, ice_if_media_change),
161 	DEVMETHOD(ifdi_init, ice_if_init),
162 	DEVMETHOD(ifdi_stop, ice_if_stop),
163 	DEVMETHOD(ifdi_timer, ice_if_timer),
164 	DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status),
165 	DEVMETHOD(ifdi_multi_set, ice_if_multi_set),
166 	DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register),
167 	DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister),
168 	DEVMETHOD(ifdi_get_counter, ice_if_get_counter),
169 	DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl),
170 	DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req),
171 	DEVMETHOD(ifdi_suspend, ice_if_suspend),
172 	DEVMETHOD(ifdi_resume, ice_if_resume),
173 	DEVMETHOD_END
174 };
175 
176 /**
177  * @var ice_driver
178  * @brief driver structure for the generic device stack
179  *
180  * driver_t definition used to setup the generic device methods.
181  */
182 static driver_t ice_driver = {
183 	.name = "ice",
184 	.methods = ice_methods,
185 	.size = sizeof(struct ice_softc),
186 };
187 
188 /**
189  * @var ice_iflib_driver
190  * @brief driver structure for the iflib stack
191  *
192  * driver_t definition used to setup the iflib device methods.
193  */
194 static driver_t ice_iflib_driver = {
195 	.name = "ice",
196 	.methods = ice_iflib_methods,
197 	.size = sizeof(struct ice_softc),
198 };
199 
200 extern struct if_txrx ice_txrx;
201 extern struct if_txrx ice_recovery_txrx;
202 
203 /**
204  * @var ice_sctx
205  * @brief ice driver shared context
206  *
207  * Structure defining shared values (context) that is used by all instances of
208  * the device. Primarily used to setup details about how the iflib stack
209  * should treat this driver. Also defines the default, minimum, and maximum
210  * number of descriptors in each ring.
211  */
212 static struct if_shared_ctx ice_sctx = {
213 	.isc_magic = IFLIB_MAGIC,
214 	.isc_q_align = PAGE_SIZE,
215 
216 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
217 	/* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but
218 	 * that doesn't make sense since that would be larger than the maximum
219 	 * size of a single packet.
220 	 */
221 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
222 
223 	/* XXX: This is only used by iflib to ensure that
224 	 * scctx->isc_tx_tso_size_max + the VLAN header is a valid size.
225 	 */
226 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
227 	/* XXX: This is used by iflib to set the number of segments in the TSO
228 	 * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the
229 	 * related ifnet parameter.
230 	 */
231 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
232 
233 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
234 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
235 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
236 
237 	.isc_nfl = 1,
238 	.isc_ntxqs = 1,
239 	.isc_nrxqs = 1,
240 
241 	.isc_admin_intrcnt = 1,
242 	.isc_vendor_info = ice_vendor_info_array,
243 	.isc_driver_version = __DECONST(char *, ice_driver_version),
244 	.isc_driver = &ice_iflib_driver,
245 
246 	/*
247 	 * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available
248 	 * for hardware checksum offload
249 	 *
250 	 * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the
251 	 * IP sum field, required by our hardware to calculate valid TSO
252 	 * checksums.
253 	 *
254 	 * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs
255 	 * even when the interface is down.
256 	 *
257 	 * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X
258 	 * vectors manually instead of relying on iflib code to do this.
259 	 */
260 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
261 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX,
262 
263 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
264 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
265 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
266 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
267 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
268 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
269 };
270 
271 DRIVER_MODULE(ice, pci, ice_driver, ice_module_event_handler, NULL);
272 
273 MODULE_VERSION(ice, 1);
274 MODULE_DEPEND(ice, pci, 1, 1, 1);
275 MODULE_DEPEND(ice, ether, 1, 1, 1);
276 MODULE_DEPEND(ice, iflib, 1, 1, 1);
277 
278 IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array);
279 
280 /* Static driver-wide sysctls */
281 #include "ice_iflib_sysctls.h"
282 
283 /**
284  * ice_pci_mapping - Map PCI BAR memory
285  * @sc: device private softc
286  *
287  * Map PCI BAR 0 for device operation.
288  */
289 static int
290 ice_pci_mapping(struct ice_softc *sc)
291 {
292 	int rc;
293 
294 	/* Map BAR0 */
295 	rc = ice_map_bar(sc->dev, &sc->bar0, 0);
296 	if (rc)
297 		return rc;
298 
299 	return 0;
300 }
301 
302 /**
303  * ice_free_pci_mapping - Release PCI BAR memory
304  * @sc: device private softc
305  *
306  * Release PCI BARs which were previously mapped by ice_pci_mapping().
307  */
308 static void
309 ice_free_pci_mapping(struct ice_softc *sc)
310 {
311 	/* Free BAR0 */
312 	ice_free_bar(sc->dev, &sc->bar0);
313 }
314 
315 /*
316  * Device methods
317  */
318 
319 /**
320  * ice_register - register device method callback
321  * @dev: the device being registered
322  *
323  * Returns a pointer to the shared context structure, which is used by iflib.
324  */
325 static void *
326 ice_register(device_t dev __unused)
327 {
328 	return &ice_sctx;
329 } /* ice_register */
330 
331 /**
332  * ice_setup_scctx - Setup the iflib softc context structure
333  * @sc: the device private structure
334  *
335  * Setup the parameters in if_softc_ctx_t structure used by the iflib stack
336  * when loading.
337  */
338 static void
339 ice_setup_scctx(struct ice_softc *sc)
340 {
341 	if_softc_ctx_t scctx = sc->scctx;
342 	struct ice_hw *hw = &sc->hw;
343 	bool safe_mode, recovery_mode;
344 
345 	safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE);
346 	recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE);
347 
348 	/*
349 	 * If the driver loads in Safe mode or Recovery mode, limit iflib to
350 	 * a single queue pair.
351 	 */
352 	if (safe_mode || recovery_mode) {
353 		scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1;
354 		scctx->isc_ntxqsets_max = 1;
355 		scctx->isc_nrxqsets_max = 1;
356 	} else {
357 		/*
358 		 * iflib initially sets the isc_ntxqsets and isc_nrxqsets to
359 		 * the values of the override sysctls. Cache these initial
360 		 * values so that the driver can be aware of what the iflib
361 		 * sysctl value is when setting up MSI-X vectors.
362 		 */
363 		sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets;
364 		sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets;
365 
366 		if (scctx->isc_ntxqsets == 0)
367 			scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size;
368 		if (scctx->isc_nrxqsets == 0)
369 			scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size;
370 
371 		scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq;
372 		scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq;
373 
374 		/*
375 		 * Sanity check that the iflib sysctl values are within the
376 		 * maximum supported range.
377 		 */
378 		if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max)
379 			sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max;
380 		if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max)
381 			sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max;
382 	}
383 
384 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
385 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
386 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
387 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
388 
389 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
390 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
391 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
392 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
393 
394 	scctx->isc_msix_bar = PCIR_BAR(ICE_MSIX_BAR);
395 	scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size;
396 
397 	/*
398 	 * If the driver loads in recovery mode, disable Tx/Rx functionality
399 	 */
400 	if (recovery_mode)
401 		scctx->isc_txrx = &ice_recovery_txrx;
402 	else
403 		scctx->isc_txrx = &ice_txrx;
404 
405 	/*
406 	 * If the driver loads in Safe mode or Recovery mode, disable
407 	 * advanced features including hardware offloads.
408 	 */
409 	if (safe_mode || recovery_mode) {
410 		scctx->isc_capenable = ICE_SAFE_CAPS;
411 		scctx->isc_tx_csum_flags = 0;
412 	} else {
413 		scctx->isc_capenable = ICE_FULL_CAPS;
414 		scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
415 	}
416 
417 	scctx->isc_capabilities = scctx->isc_capenable;
418 } /* ice_setup_scctx */
419 
420 /**
421  * ice_if_attach_pre - Early device attach logic
422  * @ctx: the iflib context structure
423  *
424  * Called by iflib during the attach process. Earliest main driver entry
425  * point which performs necessary hardware and driver initialization. Called
426  * before the Tx and Rx queues are allocated.
427  */
428 static int
429 ice_if_attach_pre(if_ctx_t ctx)
430 {
431 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
432 	enum ice_fw_modes fw_mode;
433 	enum ice_status status;
434 	if_softc_ctx_t scctx;
435 	struct ice_hw *hw;
436 	device_t dev;
437 	int err;
438 
439 	device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n");
440 
441 	ice_set_state(&sc->state, ICE_STATE_ATTACHING);
442 
443 	sc->ctx = ctx;
444 	sc->media = iflib_get_media(ctx);
445 	sc->sctx = iflib_get_sctx(ctx);
446 	sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx);
447 
448 	dev = sc->dev = iflib_get_dev(ctx);
449 	scctx = sc->scctx = iflib_get_softc_ctx(ctx);
450 
451 	hw = &sc->hw;
452 	hw->back = sc;
453 
454 	snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name),
455 		 "%s:admin", device_get_nameunit(dev));
456 	mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF);
457 	callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0);
458 
459 	ASSERT_CTX_LOCKED(sc);
460 
461 	if (ice_pci_mapping(sc)) {
462 		err = (ENXIO);
463 		goto destroy_admin_timer;
464 	}
465 
466 	/* Save off the PCI information */
467 	ice_save_pci_info(hw, dev);
468 
469 	/* create tunables as early as possible */
470 	ice_add_device_tunables(sc);
471 
472 	/* Setup ControlQ lengths */
473 	ice_set_ctrlq_len(hw);
474 
475 reinit_hw:
476 
477 	fw_mode = ice_get_fw_mode(hw);
478 	if (fw_mode == ICE_FW_MODE_REC) {
479 		device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
480 
481 		err = ice_attach_pre_recovery_mode(sc);
482 		if (err)
483 			goto free_pci_mapping;
484 
485 		return (0);
486 	}
487 
488 	/* Initialize the hw data structure */
489 	status = ice_init_hw(hw);
490 	if (status) {
491 		if (status == ICE_ERR_FW_API_VER) {
492 			/* Enter recovery mode, so that the driver remains
493 			 * loaded. This way, if the system administrator
494 			 * cannot update the driver, they may still attempt to
495 			 * downgrade the NVM.
496 			 */
497 			err = ice_attach_pre_recovery_mode(sc);
498 			if (err)
499 				goto free_pci_mapping;
500 
501 			return (0);
502 		} else {
503 			err = EIO;
504 			device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n",
505 				      ice_status_str(status),
506 				      ice_aq_str(hw->adminq.sq_last_status));
507 		}
508 		goto free_pci_mapping;
509 	}
510 
511 	ice_init_device_features(sc);
512 
513 	/* Notify firmware of the device driver version */
514 	err = ice_send_version(sc);
515 	if (err)
516 		goto deinit_hw;
517 
518 	/*
519 	 * Success indicates a change was made that requires a reinitialization
520 	 * of the hardware
521 	 */
522 	err = ice_load_pkg_file(sc);
523 	if (err == ICE_SUCCESS) {
524 		ice_deinit_hw(hw);
525 		goto reinit_hw;
526 	}
527 
528 	err = ice_init_link_events(sc);
529 	if (err) {
530 		device_printf(dev, "ice_init_link_events failed: %s\n",
531 			      ice_err_str(err));
532 		goto deinit_hw;
533 	}
534 
535 	/* Initialize VLAN mode in FW; if dual VLAN mode is supported by the package
536 	 * and firmware, this will force them to use single VLAN mode.
537 	 */
538 	status = ice_set_vlan_mode(hw);
539 	if (status) {
540 		err = EIO;
541 		device_printf(dev, "Unable to initialize VLAN mode, err %s aq_err %s\n",
542 			      ice_status_str(status),
543 			      ice_aq_str(hw->adminq.sq_last_status));
544 		goto deinit_hw;
545 	}
546 
547 	ice_print_nvm_version(sc);
548 
549 	/* Setup the MAC address */
550 	iflib_set_mac(ctx, hw->port_info->mac.lan_addr);
551 
552 	/* Setup the iflib softc context structure */
553 	ice_setup_scctx(sc);
554 
555 	/* Initialize the Tx queue manager */
556 	err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq);
557 	if (err) {
558 		device_printf(dev, "Unable to initialize Tx queue manager: %s\n",
559 			      ice_err_str(err));
560 		goto deinit_hw;
561 	}
562 
563 	/* Initialize the Rx queue manager */
564 	err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq);
565 	if (err) {
566 		device_printf(dev, "Unable to initialize Rx queue manager: %s\n",
567 			      ice_err_str(err));
568 		goto free_tx_qmgr;
569 	}
570 
571 	/* Initialize the interrupt resource manager */
572 	err = ice_alloc_intr_tracking(sc);
573 	if (err)
574 		/* Errors are already printed */
575 		goto free_rx_qmgr;
576 
577 	/* Determine maximum number of VSIs we'll prepare for */
578 	sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE,
579 				    hw->func_caps.guar_num_vsi);
580 
581 	if (!sc->num_available_vsi) {
582 		err = EIO;
583 		device_printf(dev, "No VSIs allocated to host\n");
584 		goto free_intr_tracking;
585 	}
586 
587 	/* Allocate storage for the VSI pointers */
588 	sc->all_vsi = (struct ice_vsi **)
589 		malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi,
590 		       M_ICE, M_WAITOK | M_ZERO);
591 	if (!sc->all_vsi) {
592 		err = ENOMEM;
593 		device_printf(dev, "Unable to allocate VSI array\n");
594 		goto free_intr_tracking;
595 	}
596 
597 	/*
598 	 * Prepare the statically allocated primary PF VSI in the softc
599 	 * structure. Other VSIs will be dynamically allocated as needed.
600 	 */
601 	ice_setup_pf_vsi(sc);
602 
603 	err = ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max,
604 	    scctx->isc_nrxqsets_max);
605 	if (err) {
606 		device_printf(dev, "Unable to allocate VSI Queue maps\n");
607 		goto free_main_vsi;
608 	}
609 
610 	/* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */
611 	err = ice_allocate_msix(sc);
612 	if (err)
613 		goto free_main_vsi;
614 
615 	return 0;
616 
617 free_main_vsi:
618 	/* ice_release_vsi will free the queue maps if they were allocated */
619 	ice_release_vsi(&sc->pf_vsi);
620 	free(sc->all_vsi, M_ICE);
621 	sc->all_vsi = NULL;
622 free_intr_tracking:
623 	ice_free_intr_tracking(sc);
624 free_rx_qmgr:
625 	ice_resmgr_destroy(&sc->rx_qmgr);
626 free_tx_qmgr:
627 	ice_resmgr_destroy(&sc->tx_qmgr);
628 deinit_hw:
629 	ice_deinit_hw(hw);
630 free_pci_mapping:
631 	ice_free_pci_mapping(sc);
632 destroy_admin_timer:
633 	mtx_lock(&sc->admin_mtx);
634 	callout_stop(&sc->admin_timer);
635 	mtx_unlock(&sc->admin_mtx);
636 	mtx_destroy(&sc->admin_mtx);
637 	return err;
638 } /* ice_if_attach_pre */
639 
640 /**
641  * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery
642  * @sc: the device private softc
643  *
644  * Loads the device driver in limited Firmware Recovery mode, intended to
645  * allow users to update the firmware to attempt to recover the device.
646  *
647  * @remark We may enter recovery mode in case either (a) the firmware is
648  * detected to be in an invalid state and must be re-programmed, or (b) the
649  * driver detects that the loaded firmware has a non-compatible API version
650  * that the driver cannot operate with.
651  */
652 static int
653 ice_attach_pre_recovery_mode(struct ice_softc *sc)
654 {
655 	ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE);
656 
657 	/* Setup the iflib softc context */
658 	ice_setup_scctx(sc);
659 
660 	/* Setup the PF VSI back pointer */
661 	sc->pf_vsi.sc = sc;
662 
663 	/*
664 	 * We still need to allocate MSI-X vectors since we need one vector to
665 	 * run the administrative admin interrupt
666 	 */
667 	return ice_allocate_msix(sc);
668 }
669 
670 /**
671  * ice_update_link_status - notify OS of link state change
672  * @sc: device private softc structure
673  * @update_media: true if we should update media even if link didn't change
674  *
675  * Called to notify iflib core of link status changes. Should be called once
676  * during attach_post, and whenever link status changes during runtime.
677  *
678  * This call only updates the currently supported media types if the link
679  * status changed, or if update_media is set to true.
680  */
681 static void
682 ice_update_link_status(struct ice_softc *sc, bool update_media)
683 {
684 	struct ice_hw *hw = &sc->hw;
685 	enum ice_status status;
686 
687 	/* Never report link up when in recovery mode */
688 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
689 		return;
690 
691 	/* Report link status to iflib only once each time it changes */
692 	if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) {
693 		if (sc->link_up) { /* link is up */
694 			uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info);
695 
696 			ice_set_default_local_lldp_mib(sc);
697 
698 			iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
699 			ice_rdma_link_change(sc, LINK_STATE_UP, baudrate);
700 
701 			ice_link_up_msg(sc);
702 		} else { /* link is down */
703 			iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
704 			ice_rdma_link_change(sc, LINK_STATE_DOWN, 0);
705 		}
706 		update_media = true;
707 	}
708 
709 	/* Update the supported media types */
710 	if (update_media) {
711 		status = ice_add_media_types(sc, sc->media);
712 		if (status)
713 			device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
714 				      ice_status_str(status),
715 				      ice_aq_str(hw->adminq.sq_last_status));
716 	}
717 }
718 
719 /**
720  * ice_if_attach_post - Late device attach logic
721  * @ctx: the iflib context structure
722  *
723  * Called by iflib to finish up attaching the device. Performs any attach
724  * logic which must wait until after the Tx and Rx queues have been
725  * allocated.
726  */
727 static int
728 ice_if_attach_post(if_ctx_t ctx)
729 {
730 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
731 	if_t ifp = iflib_get_ifp(ctx);
732 	int err;
733 
734 	ASSERT_CTX_LOCKED(sc);
735 
736 	/* We don't yet support loading if MSI-X is not supported */
737 	if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) {
738 		device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n");
739 		return (ENOTSUP);
740 	}
741 
742 	/* The ifnet structure hasn't yet been initialized when the attach_pre
743 	 * handler is called, so wait until attach_post to setup the
744 	 * isc_max_frame_size.
745 	 */
746 
747 	sc->ifp = ifp;
748 	sc->scctx->isc_max_frame_size = if_getmtu(ifp) +
749 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
750 
751 	/*
752 	 * If we are in recovery mode, only perform a limited subset of
753 	 * initialization to support NVM recovery.
754 	 */
755 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
756 		ice_attach_post_recovery_mode(sc);
757 		return (0);
758 	}
759 
760 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
761 
762 	err = ice_initialize_vsi(&sc->pf_vsi);
763 	if (err) {
764 		device_printf(sc->dev, "Unable to initialize Main VSI: %s\n",
765 			      ice_err_str(err));
766 		return err;
767 	}
768 
769 	/* Enable FW health event reporting */
770 	ice_init_health_events(sc);
771 
772 	/* Configure the main PF VSI for RSS */
773 	err = ice_config_rss(&sc->pf_vsi);
774 	if (err) {
775 		device_printf(sc->dev,
776 			      "Unable to configure RSS for the main VSI, err %s\n",
777 			      ice_err_str(err));
778 		return err;
779 	}
780 
781 	/* Configure switch to drop transmitted LLDP and PAUSE frames */
782 	err = ice_cfg_pf_ethertype_filters(sc);
783 	if (err)
784 		return err;
785 
786 	ice_get_and_print_bus_info(sc);
787 
788 	ice_set_link_management_mode(sc);
789 
790 	ice_init_saved_phy_cfg(sc);
791 
792 	ice_cfg_pba_num(sc);
793 
794 	ice_add_device_sysctls(sc);
795 
796 	/* Get DCBX/LLDP state and start DCBX agent */
797 	ice_init_dcb_setup(sc);
798 
799 	/* Setup link configuration parameters */
800 	ice_init_link_configuration(sc);
801 	ice_update_link_status(sc, true);
802 
803 	/* Configure interrupt causes for the administrative interrupt */
804 	ice_configure_misc_interrupts(sc);
805 
806 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
807 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
808 
809 	err = ice_rdma_pf_attach(sc);
810 	if (err)
811 		return (err);
812 
813 	/* Start the admin timer */
814 	mtx_lock(&sc->admin_mtx);
815 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
816 	mtx_unlock(&sc->admin_mtx);
817 
818 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
819 
820 	return 0;
821 } /* ice_if_attach_post */
822 
823 /**
824  * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery
825  * @sc: the device private softc
826  *
827  * Performs minimal work to prepare the driver to recover an NVM in case the
828  * firmware is in recovery mode.
829  */
830 static void
831 ice_attach_post_recovery_mode(struct ice_softc *sc)
832 {
833 	/* Configure interrupt causes for the administrative interrupt */
834 	ice_configure_misc_interrupts(sc);
835 
836 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
837 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
838 
839 	/* Start the admin timer */
840 	mtx_lock(&sc->admin_mtx);
841 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
842 	mtx_unlock(&sc->admin_mtx);
843 
844 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
845 }
846 
847 /**
848  * ice_free_irqvs - Free IRQ vector memory
849  * @sc: the device private softc structure
850  *
851  * Free IRQ vector memory allocated during ice_if_msix_intr_assign.
852  */
853 static void
854 ice_free_irqvs(struct ice_softc *sc)
855 {
856 	struct ice_vsi *vsi = &sc->pf_vsi;
857 	if_ctx_t ctx = sc->ctx;
858 	int i;
859 
860 	/* If the irqvs array is NULL, then there are no vectors to free */
861 	if (sc->irqvs == NULL)
862 		return;
863 
864 	/* Free the IRQ vectors */
865 	for (i = 0; i < sc->num_irq_vectors; i++)
866 		iflib_irq_free(ctx, &sc->irqvs[i].irq);
867 
868 	/* Clear the irqv pointers */
869 	for (i = 0; i < vsi->num_rx_queues; i++)
870 		vsi->rx_queues[i].irqv = NULL;
871 
872 	for (i = 0; i < vsi->num_tx_queues; i++)
873 		vsi->tx_queues[i].irqv = NULL;
874 
875 	/* Release the vector array memory */
876 	free(sc->irqvs, M_ICE);
877 	sc->irqvs = NULL;
878 	sc->num_irq_vectors = 0;
879 }
880 
881 /**
882  * ice_if_detach - Device driver detach logic
883  * @ctx: iflib context structure
884  *
885  * Perform device shutdown logic to detach the device driver.
886  *
887  * Note that there is no guarantee of the ordering of ice_if_queues_free() and
888  * ice_if_detach(). It is possible for the functions to be called in either
889  * order, and they must not assume to have a strict ordering.
890  */
891 static int
892 ice_if_detach(if_ctx_t ctx)
893 {
894 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
895 	struct ice_vsi *vsi = &sc->pf_vsi;
896 	int i;
897 
898 	ASSERT_CTX_LOCKED(sc);
899 
900 	/* Indicate that we're detaching */
901 	ice_set_state(&sc->state, ICE_STATE_DETACHING);
902 
903 	/* Stop the admin timer */
904 	mtx_lock(&sc->admin_mtx);
905 	callout_stop(&sc->admin_timer);
906 	mtx_unlock(&sc->admin_mtx);
907 	mtx_destroy(&sc->admin_mtx);
908 
909 	ice_rdma_pf_detach(sc);
910 
911 	/* Free allocated media types */
912 	ifmedia_removeall(sc->media);
913 
914 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
915 	 * pointers. Note, the calls here and those in ice_if_queues_free()
916 	 * are *BOTH* necessary, as we cannot guarantee which path will be
917 	 * run first
918 	 */
919 	ice_vsi_del_txqs_ctx(vsi);
920 	ice_vsi_del_rxqs_ctx(vsi);
921 
922 	/* Release MSI-X resources */
923 	ice_free_irqvs(sc);
924 
925 	for (i = 0; i < sc->num_available_vsi; i++) {
926 		if (sc->all_vsi[i])
927 			ice_release_vsi(sc->all_vsi[i]);
928 	}
929 
930 	if (sc->all_vsi) {
931 		free(sc->all_vsi, M_ICE);
932 		sc->all_vsi = NULL;
933 	}
934 
935 	/* Release MSI-X memory */
936 	pci_release_msi(sc->dev);
937 
938 	if (sc->msix_table != NULL) {
939 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
940 				     rman_get_rid(sc->msix_table),
941 				     sc->msix_table);
942 		sc->msix_table = NULL;
943 	}
944 
945 	ice_free_intr_tracking(sc);
946 
947 	/* Destroy the queue managers */
948 	ice_resmgr_destroy(&sc->tx_qmgr);
949 	ice_resmgr_destroy(&sc->rx_qmgr);
950 
951 	if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
952 		ice_deinit_hw(&sc->hw);
953 
954 	ice_free_pci_mapping(sc);
955 
956 	return 0;
957 } /* ice_if_detach */
958 
959 /**
960  * ice_if_tx_queues_alloc - Allocate Tx queue memory
961  * @ctx: iflib context structure
962  * @vaddrs: virtual addresses for the queue memory
963  * @paddrs: physical addresses for the queue memory
964  * @ntxqs: the number of Tx queues per set (should always be 1)
965  * @ntxqsets: the number of Tx queue sets to allocate
966  *
967  * Called by iflib to allocate Tx queues for the device. Allocates driver
968  * memory to track each queue, the status arrays used for descriptor
969  * status reporting, and Tx queue sysctls.
970  */
971 static int
972 ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
973 		       int __invariant_only ntxqs, int ntxqsets)
974 {
975 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
976 	struct ice_vsi *vsi = &sc->pf_vsi;
977 	struct ice_tx_queue *txq;
978 	int err, i, j;
979 
980 	MPASS(ntxqs == 1);
981 	MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
982 	ASSERT_CTX_LOCKED(sc);
983 
984 	/* Do not bother allocating queues if we're in recovery mode */
985 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
986 		return (0);
987 
988 	/* Allocate queue structure memory */
989 	if (!(vsi->tx_queues =
990 	      (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
991 		device_printf(sc->dev, "Unable to allocate Tx queue memory\n");
992 		return (ENOMEM);
993 	}
994 
995 	/* Allocate report status arrays */
996 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
997 		if (!(txq->tx_rsq =
998 		      (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
999 			device_printf(sc->dev, "Unable to allocate tx_rsq memory\n");
1000 			err = ENOMEM;
1001 			goto free_tx_queues;
1002 		}
1003 		/* Initialize report status array */
1004 		for (j = 0; j < sc->scctx->isc_ntxd[0]; j++)
1005 			txq->tx_rsq[j] = QIDX_INVALID;
1006 	}
1007 
1008 	/* Assign queues from PF space to the main VSI */
1009 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets);
1010 	if (err) {
1011 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1012 			      ice_err_str(err));
1013 		goto free_tx_queues;
1014 	}
1015 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1016 
1017 	/* Add Tx queue sysctls context */
1018 	ice_vsi_add_txqs_ctx(vsi);
1019 
1020 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1021 		/* q_handle == me when only one TC */
1022 		txq->me = txq->q_handle = i;
1023 		txq->vsi = vsi;
1024 
1025 		/* store the queue size for easier access */
1026 		txq->desc_count = sc->scctx->isc_ntxd[0];
1027 
1028 		/* get the virtual and physical address of the hardware queues */
1029 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
1030 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
1031 		txq->tx_paddr = paddrs[i];
1032 
1033 		ice_add_txq_sysctls(txq);
1034 	}
1035 
1036 	vsi->num_tx_queues = ntxqsets;
1037 
1038 	return (0);
1039 
1040 free_tx_queues:
1041 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1042 		if (txq->tx_rsq != NULL) {
1043 			free(txq->tx_rsq, M_ICE);
1044 			txq->tx_rsq = NULL;
1045 		}
1046 	}
1047 	free(vsi->tx_queues, M_ICE);
1048 	vsi->tx_queues = NULL;
1049 	return err;
1050 }
1051 
1052 /**
1053  * ice_if_rx_queues_alloc - Allocate Rx queue memory
1054  * @ctx: iflib context structure
1055  * @vaddrs: virtual addresses for the queue memory
1056  * @paddrs: physical addresses for the queue memory
1057  * @nrxqs: number of Rx queues per set (should always be 1)
1058  * @nrxqsets: number of Rx queue sets to allocate
1059  *
1060  * Called by iflib to allocate Rx queues for the device. Allocates driver
1061  * memory to track each queue, as well as sets up the Rx queue sysctls.
1062  */
1063 static int
1064 ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1065 		       int __invariant_only nrxqs, int nrxqsets)
1066 {
1067 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1068 	struct ice_vsi *vsi = &sc->pf_vsi;
1069 	struct ice_rx_queue *rxq;
1070 	int err, i;
1071 
1072 	MPASS(nrxqs == 1);
1073 	MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
1074 	ASSERT_CTX_LOCKED(sc);
1075 
1076 	/* Do not bother allocating queues if we're in recovery mode */
1077 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1078 		return (0);
1079 
1080 	/* Allocate queue structure memory */
1081 	if (!(vsi->rx_queues =
1082 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1083 		device_printf(sc->dev, "Unable to allocate Rx queue memory\n");
1084 		return (ENOMEM);
1085 	}
1086 
1087 	/* Assign queues from PF space to the main VSI */
1088 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets);
1089 	if (err) {
1090 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1091 			      ice_err_str(err));
1092 		goto free_rx_queues;
1093 	}
1094 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1095 
1096 	/* Add Rx queue sysctls context */
1097 	ice_vsi_add_rxqs_ctx(vsi);
1098 
1099 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
1100 		rxq->me = i;
1101 		rxq->vsi = vsi;
1102 
1103 		/* store the queue size for easier access */
1104 		rxq->desc_count = sc->scctx->isc_nrxd[0];
1105 
1106 		/* get the virtual and physical address of the hardware queues */
1107 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
1108 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
1109 		rxq->rx_paddr = paddrs[i];
1110 
1111 		ice_add_rxq_sysctls(rxq);
1112 	}
1113 
1114 	vsi->num_rx_queues = nrxqsets;
1115 
1116 	return (0);
1117 
1118 free_rx_queues:
1119 	free(vsi->rx_queues, M_ICE);
1120 	vsi->rx_queues = NULL;
1121 	return err;
1122 }
1123 
1124 /**
1125  * ice_if_queues_free - Free queue memory
1126  * @ctx: the iflib context structure
1127  *
1128  * Free queue memory allocated by ice_if_tx_queues_alloc() and
1129  * ice_if_rx_queues_alloc().
1130  *
1131  * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be
1132  * called in the same order. It's possible for ice_if_queues_free() to be
1133  * called prior to ice_if_detach(), and vice versa.
1134  *
1135  * For this reason, the main VSI is a static member of the ice_softc, which is
1136  * not free'd until after iflib finishes calling both of these functions.
1137  *
1138  * Thus, care must be taken in how we manage the memory being freed by this
1139  * function, and in what tasks it can and must perform.
1140  */
1141 static void
1142 ice_if_queues_free(if_ctx_t ctx)
1143 {
1144 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1145 	struct ice_vsi *vsi = &sc->pf_vsi;
1146 	struct ice_tx_queue *txq;
1147 	int i;
1148 
1149 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
1150 	 * pointers. Note, the calls here and those in ice_if_detach()
1151 	 * are *BOTH* necessary, as we cannot guarantee which path will be
1152 	 * run first
1153 	 */
1154 	ice_vsi_del_txqs_ctx(vsi);
1155 	ice_vsi_del_rxqs_ctx(vsi);
1156 
1157 	/* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */
1158 	ice_free_irqvs(sc);
1159 
1160 	if (vsi->tx_queues != NULL) {
1161 		/* free the tx_rsq arrays */
1162 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1163 			if (txq->tx_rsq != NULL) {
1164 				free(txq->tx_rsq, M_ICE);
1165 				txq->tx_rsq = NULL;
1166 			}
1167 		}
1168 		free(vsi->tx_queues, M_ICE);
1169 		vsi->tx_queues = NULL;
1170 		vsi->num_tx_queues = 0;
1171 	}
1172 	if (vsi->rx_queues != NULL) {
1173 		free(vsi->rx_queues, M_ICE);
1174 		vsi->rx_queues = NULL;
1175 		vsi->num_rx_queues = 0;
1176 	}
1177 }
1178 
1179 /**
1180  * ice_msix_que - Fast interrupt handler for MSI-X receive queues
1181  * @arg: The Rx queue memory
1182  *
1183  * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when
1184  * an MSI-X interrupt for a given queue is triggered. Currently this just asks
1185  * iflib to schedule the main Rx thread.
1186  */
1187 static int
1188 ice_msix_que(void *arg)
1189 {
1190 	struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg;
1191 
1192 	/* TODO: dynamic ITR algorithm?? */
1193 
1194 	return (FILTER_SCHEDULE_THREAD);
1195 }
1196 
1197 /**
1198  * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt
1199  * @arg: pointer to device softc memory
1200  *
1201  * Called by iflib when an administrative interrupt occurs. Should perform any
1202  * fast logic for handling the interrupt cause, and then indicate whether the
1203  * admin task needs to be queued.
1204  */
1205 static int
1206 ice_msix_admin(void *arg)
1207 {
1208 	struct ice_softc *sc = (struct ice_softc *)arg;
1209 	struct ice_hw *hw = &sc->hw;
1210 	device_t dev = sc->dev;
1211 	u32 oicr;
1212 
1213 	/* There is no safe way to modify the enabled miscellaneous causes of
1214 	 * the OICR vector at runtime, as doing so would be prone to race
1215 	 * conditions. Reading PFINT_OICR will unmask the associated interrupt
1216 	 * causes and allow future interrupts to occur. The admin interrupt
1217 	 * vector will not be re-enabled until after we exit this function,
1218 	 * but any delayed tasks must be resilient against possible "late
1219 	 * arrival" interrupts that occur while we're already handling the
1220 	 * task. This is done by using state bits and serializing these
1221 	 * delayed tasks via the admin status task function.
1222 	 */
1223 	oicr = rd32(hw, PFINT_OICR);
1224 
1225 	/* Processing multiple controlq interrupts on a single vector does not
1226 	 * provide an indication of which controlq triggered the interrupt.
1227 	 * We might try reading the INTEVENT bit of the respective PFINT_*_CTL
1228 	 * registers. However, the INTEVENT bit is not guaranteed to be set as
1229 	 * it gets automatically cleared when the hardware acknowledges the
1230 	 * interrupt.
1231 	 *
1232 	 * This means we don't really have a good indication of whether or
1233 	 * which controlq triggered this interrupt. We'll just notify the
1234 	 * admin task that it should check all the controlqs.
1235 	 */
1236 	ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
1237 
1238 	if (oicr & PFINT_OICR_VFLR_M) {
1239 		ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING);
1240 	}
1241 
1242 	if (oicr & PFINT_OICR_MAL_DETECT_M) {
1243 		ice_set_state(&sc->state, ICE_STATE_MDD_PENDING);
1244 	}
1245 
1246 	if (oicr & PFINT_OICR_GRST_M) {
1247 		u32 reset;
1248 
1249 		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
1250 			GLGEN_RSTAT_RESET_TYPE_S;
1251 
1252 		if (reset == ICE_RESET_CORER)
1253 			sc->soft_stats.corer_count++;
1254 		else if (reset == ICE_RESET_GLOBR)
1255 			sc->soft_stats.globr_count++;
1256 		else
1257 			sc->soft_stats.empr_count++;
1258 
1259 		/* There are a couple of bits at play for handling resets.
1260 		 * First, the ICE_STATE_RESET_OICR_RECV bit is used to
1261 		 * indicate that the driver has received an OICR with a reset
1262 		 * bit active, indicating that a CORER/GLOBR/EMPR is about to
1263 		 * happen. Second, we set hw->reset_ongoing to indicate that
1264 		 * the hardware is in reset. We will set this back to false as
1265 		 * soon as the driver has determined that the hardware is out
1266 		 * of reset.
1267 		 *
1268 		 * If the driver wishes to trigger a request, it can set one of
1269 		 * the ICE_STATE_RESET_*_REQ bits, which will trigger the
1270 		 * correct type of reset.
1271 		 */
1272 		if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
1273 			hw->reset_ongoing = true;
1274 	}
1275 
1276 	if (oicr & PFINT_OICR_ECC_ERR_M) {
1277 		device_printf(dev, "ECC Error detected!\n");
1278 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1279 	}
1280 
1281 	if (oicr & PFINT_OICR_PE_CRITERR_M) {
1282 		device_printf(dev, "Critical Protocol Engine Error detected!\n");
1283 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1284 	}
1285 
1286 	if (oicr & PFINT_OICR_PCI_EXCEPTION_M) {
1287 		device_printf(dev, "PCI Exception detected!\n");
1288 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1289 	}
1290 
1291 	if (oicr & PFINT_OICR_HMC_ERR_M) {
1292 		/* Log the HMC errors, but don't disable the interrupt cause */
1293 		ice_log_hmc_error(hw, dev);
1294 	}
1295 
1296 	return (FILTER_SCHEDULE_THREAD);
1297 }
1298 
1299 /**
1300  * ice_allocate_msix - Allocate MSI-X vectors for the interface
1301  * @sc: the device private softc
1302  *
1303  * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process.
1304  *
1305  * First, determine a suitable total number of vectors based on the number
1306  * of CPUs, RSS buckets, the administrative vector, and other demands such as
1307  * RDMA.
1308  *
1309  * Request the desired amount of vectors, and see how many we obtain. If we
1310  * don't obtain as many as desired, reduce the demands by lowering the number
1311  * of requested queues or reducing the demand from other features such as
1312  * RDMA.
1313  *
1314  * @remark This function is required because the driver sets the
1315  * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors
1316  * manually.
1317  *
1318  * @remark This driver will only use MSI-X vectors. If this is not possible,
1319  * neither MSI or legacy interrupts will be tried.
1320  *
1321  * @post on success this function must set the following scctx parameters:
1322  * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr.
1323  *
1324  * @returns zero on success or an error code on failure.
1325  */
1326 static int
1327 ice_allocate_msix(struct ice_softc *sc)
1328 {
1329 	bool iflib_override_queue_count = false;
1330 	if_softc_ctx_t scctx = sc->scctx;
1331 	device_t dev = sc->dev;
1332 	cpuset_t cpus;
1333 	int bar, queues, vectors, requested;
1334 	int err = 0;
1335 	int rdma;
1336 
1337 	/* Allocate the MSI-X bar */
1338 	bar = scctx->isc_msix_bar;
1339 	sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE);
1340 	if (!sc->msix_table) {
1341 		device_printf(dev, "Unable to map MSI-X table\n");
1342 		return (ENOMEM);
1343 	}
1344 
1345 	/* Check if the iflib queue count sysctls have been set */
1346 	if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs)
1347 		iflib_override_queue_count = true;
1348 
1349 	err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus);
1350 	if (err) {
1351 		device_printf(dev, "%s: Unable to fetch the CPU list: %s\n",
1352 			      __func__, ice_err_str(err));
1353 		CPU_COPY(&all_cpus, &cpus);
1354 	}
1355 
1356 	/* Attempt to mimic behavior of iflib_msix_init */
1357 	if (iflib_override_queue_count) {
1358 		/*
1359 		 * If the override sysctls have been set, limit the queues to
1360 		 * the number of logical CPUs.
1361 		 */
1362 		queues = mp_ncpus;
1363 	} else {
1364 		/*
1365 		 * Otherwise, limit the queue count to the CPUs associated
1366 		 * with the NUMA node the device is associated with.
1367 		 */
1368 		queues = CPU_COUNT(&cpus);
1369 	}
1370 
1371 	/* Clamp to the number of RSS buckets */
1372 	queues = imin(queues, rss_getnumbuckets());
1373 
1374 	/*
1375 	 * Clamp the number of queue pairs to the minimum of the requested Tx
1376 	 * and Rx queues.
1377 	 */
1378 	queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
1379 	queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
1380 
1381 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) {
1382 		/*
1383 		 * Choose a number of RDMA vectors based on the number of CPUs
1384 		 * up to a maximum
1385 		 */
1386 		rdma = min(CPU_COUNT(&cpus), ICE_RDMA_MAX_MSIX);
1387 
1388 		/* Further limit by the user configurable tunable */
1389 		rdma = min(rdma, ice_rdma_max_msix);
1390 	} else {
1391 		rdma = 0;
1392 	}
1393 
1394 	/*
1395 	 * Determine the number of vectors to request. Note that we also need
1396 	 * to allocate one vector for administrative tasks.
1397 	 */
1398 	requested = rdma + queues + 1;
1399 
1400 	vectors = requested;
1401 
1402 	err = pci_alloc_msix(dev, &vectors);
1403 	if (err) {
1404 		device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n",
1405 			      vectors, ice_err_str(err));
1406 		goto err_free_msix_table;
1407 	}
1408 
1409 	/* If we don't receive enough vectors, reduce demands */
1410 	if (vectors < requested) {
1411 		int diff = requested - vectors;
1412 
1413 		device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n",
1414 			      requested, vectors);
1415 
1416 		/*
1417 		 * The OS didn't grant us the requested number of vectors.
1418 		 * Check to see if we can reduce demands by limiting the
1419 		 * number of vectors allocated to certain features.
1420 		 */
1421 
1422 		if (rdma >= diff) {
1423 			/* Reduce the number of RDMA vectors we reserve */
1424 			rdma -= diff;
1425 			diff = 0;
1426 		} else {
1427 			/* Disable RDMA and reduce the difference */
1428 			ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
1429 			diff -= rdma;
1430 			rdma = 0;
1431 		}
1432 
1433 		/*
1434 		 * If we still have a difference, we need to reduce the number
1435 		 * of queue pairs.
1436 		 *
1437 		 * However, we still need at least one vector for the admin
1438 		 * interrupt and one queue pair.
1439 		 */
1440 		if (queues <= diff) {
1441 			device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n");
1442 			err = (ERANGE);
1443 			goto err_pci_release_msi;
1444 		}
1445 
1446 		queues -= diff;
1447 	}
1448 
1449 	device_printf(dev, "Using %d Tx and Rx queues\n", queues);
1450 	if (rdma)
1451 		device_printf(dev, "Reserving %d MSI-X interrupts for iRDMA\n",
1452 			      rdma);
1453 	device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
1454 		      vectors);
1455 
1456 	scctx->isc_vectors = vectors;
1457 	scctx->isc_nrxqsets = queues;
1458 	scctx->isc_ntxqsets = queues;
1459 	scctx->isc_intr = IFLIB_INTR_MSIX;
1460 
1461 	sc->irdma_vectors = rdma;
1462 
1463 	/* Interrupt allocation tracking isn't required in recovery mode,
1464 	 * since neither RDMA nor VFs are enabled.
1465 	 */
1466 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1467 		return (0);
1468 
1469 	/* Keep track of which interrupt indices are being used for what */
1470 	sc->lan_vectors = vectors - rdma;
1471 	err = ice_resmgr_assign_contiguous(&sc->imgr, sc->pf_imap, sc->lan_vectors);
1472 	if (err) {
1473 		device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
1474 			      ice_err_str(err));
1475 		goto err_pci_release_msi;
1476 	}
1477 	err = ice_resmgr_assign_contiguous(&sc->imgr, sc->rdma_imap, rdma);
1478 	if (err) {
1479 		device_printf(dev, "Unable to assign PF RDMA interrupt mapping: %s\n",
1480 			      ice_err_str(err));
1481 		ice_resmgr_release_map(&sc->imgr, sc->pf_imap,
1482 					    sc->lan_vectors);
1483 		goto err_pci_release_msi;
1484 	}
1485 
1486 	return (0);
1487 
1488 err_pci_release_msi:
1489 	pci_release_msi(dev);
1490 err_free_msix_table:
1491 	if (sc->msix_table != NULL) {
1492 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
1493 				rman_get_rid(sc->msix_table),
1494 				sc->msix_table);
1495 		sc->msix_table = NULL;
1496 	}
1497 
1498 	return (err);
1499 }
1500 
1501 /**
1502  * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues
1503  * @ctx: the iflib context structure
1504  * @msix: the number of vectors we were assigned
1505  *
1506  * Called by iflib to assign MSI-X vectors to queues. Currently requires that
1507  * we get at least the same number of vectors as we have queues, and that we
1508  * always have the same number of Tx and Rx queues.
1509  *
1510  * Tx queues use a softirq instead of using their own hardware interrupt.
1511  */
1512 static int
1513 ice_if_msix_intr_assign(if_ctx_t ctx, int msix)
1514 {
1515 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1516 	struct ice_vsi *vsi = &sc->pf_vsi;
1517 	int err, i, vector;
1518 
1519 	ASSERT_CTX_LOCKED(sc);
1520 
1521 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
1522 		device_printf(sc->dev,
1523 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
1524 			      vsi->num_tx_queues, vsi->num_rx_queues);
1525 		return (EOPNOTSUPP);
1526 	}
1527 
1528 	if (msix < (vsi->num_rx_queues + 1)) {
1529 		device_printf(sc->dev,
1530 			      "Not enough MSI-X vectors to assign one vector to each queue pair\n");
1531 		return (EOPNOTSUPP);
1532 	}
1533 
1534 	/* Save the number of vectors for future use */
1535 	sc->num_irq_vectors = vsi->num_rx_queues + 1;
1536 
1537 	/* Allocate space to store the IRQ vector data */
1538 	if (!(sc->irqvs =
1539 	      (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors),
1540 					       M_ICE, M_NOWAIT))) {
1541 		device_printf(sc->dev,
1542 			      "Unable to allocate irqv memory\n");
1543 		return (ENOMEM);
1544 	}
1545 
1546 	/* Administrative interrupt events will use vector 0 */
1547 	err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN,
1548 				      ice_msix_admin, sc, 0, "admin");
1549 	if (err) {
1550 		device_printf(sc->dev,
1551 			      "Failed to register Admin queue handler: %s\n",
1552 			      ice_err_str(err));
1553 		goto free_irqvs;
1554 	}
1555 	sc->irqvs[0].me = 0;
1556 
1557 	/* Do not allocate queue interrupts when in recovery mode */
1558 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1559 		return (0);
1560 
1561 	for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) {
1562 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1563 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1564 		int rid = vector + 1;
1565 		char irq_name[16];
1566 
1567 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
1568 		err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid,
1569 					      IFLIB_INTR_RXTX, ice_msix_que,
1570 					      rxq, rxq->me, irq_name);
1571 		if (err) {
1572 			device_printf(sc->dev,
1573 				      "Failed to allocate q int %d err: %s\n",
1574 				      i, ice_err_str(err));
1575 			vector--;
1576 			i--;
1577 			goto fail;
1578 		}
1579 		sc->irqvs[vector].me = vector;
1580 		rxq->irqv = &sc->irqvs[vector];
1581 
1582 		bzero(irq_name, sizeof(irq_name));
1583 
1584 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
1585 		iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq,
1586 					    IFLIB_INTR_TX, txq,
1587 					    txq->me, irq_name);
1588 		txq->irqv = &sc->irqvs[vector];
1589 	}
1590 
1591 	return (0);
1592 fail:
1593 	for (; i >= 0; i--, vector--)
1594 		iflib_irq_free(ctx, &sc->irqvs[vector].irq);
1595 	iflib_irq_free(ctx, &sc->irqvs[0].irq);
1596 free_irqvs:
1597 	free(sc->irqvs, M_ICE);
1598 	sc->irqvs = NULL;
1599 	return err;
1600 }
1601 
1602 /**
1603  * ice_if_mtu_set - Set the device MTU
1604  * @ctx: iflib context structure
1605  * @mtu: the MTU requested
1606  *
1607  * Called by iflib to configure the device's Maximum Transmission Unit (MTU).
1608  *
1609  * @pre assumes the caller holds the iflib CTX lock
1610  */
1611 static int
1612 ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu)
1613 {
1614 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1615 
1616 	ASSERT_CTX_LOCKED(sc);
1617 
1618 	/* Do not support configuration when in recovery mode */
1619 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1620 		return (ENOSYS);
1621 
1622 	if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU)
1623 		return (EINVAL);
1624 
1625 	sc->scctx->isc_max_frame_size = mtu +
1626 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1627 
1628 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
1629 
1630 	return (0);
1631 }
1632 
1633 /**
1634  * ice_if_intr_enable - Enable device interrupts
1635  * @ctx: iflib context structure
1636  *
1637  * Called by iflib to request enabling device interrupts.
1638  */
1639 static void
1640 ice_if_intr_enable(if_ctx_t ctx)
1641 {
1642 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1643 	struct ice_vsi *vsi = &sc->pf_vsi;
1644 	struct ice_hw *hw = &sc->hw;
1645 
1646 	ASSERT_CTX_LOCKED(sc);
1647 
1648 	/* Enable ITR 0 */
1649 	ice_enable_intr(hw, sc->irqvs[0].me);
1650 
1651 	/* Do not enable queue interrupts in recovery mode */
1652 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1653 		return;
1654 
1655 	/* Enable all queue interrupts */
1656 	for (int i = 0; i < vsi->num_rx_queues; i++)
1657 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
1658 }
1659 
1660 /**
1661  * ice_if_intr_disable - Disable device interrupts
1662  * @ctx: iflib context structure
1663  *
1664  * Called by iflib to request disabling device interrupts.
1665  */
1666 static void
1667 ice_if_intr_disable(if_ctx_t ctx)
1668 {
1669 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1670 	struct ice_hw *hw = &sc->hw;
1671 	unsigned int i;
1672 
1673 	ASSERT_CTX_LOCKED(sc);
1674 
1675 	/* IFDI_INTR_DISABLE may be called prior to interrupts actually being
1676 	 * assigned to queues. Instead of assuming that the interrupt
1677 	 * assignment in the rx_queues structure is valid, just disable all
1678 	 * possible interrupts
1679 	 *
1680 	 * Note that we choose not to disable ITR 0 because this handles the
1681 	 * AdminQ interrupts, and we want to keep processing these even when
1682 	 * the interface is offline.
1683 	 */
1684 	for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++)
1685 		ice_disable_intr(hw, i);
1686 }
1687 
1688 /**
1689  * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
1690  * @ctx: iflib context structure
1691  * @rxqid: the Rx queue to enable
1692  *
1693  * Enable a specific Rx queue interrupt.
1694  *
1695  * This function is not protected by the iflib CTX lock.
1696  */
1697 static int
1698 ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
1699 {
1700 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1701 	struct ice_vsi *vsi = &sc->pf_vsi;
1702 	struct ice_hw *hw = &sc->hw;
1703 
1704 	/* Do not enable queue interrupts in recovery mode */
1705 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1706 		return (ENOSYS);
1707 
1708 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
1709 	return (0);
1710 }
1711 
1712 /**
1713  * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
1714  * @ctx: iflib context structure
1715  * @txqid: the Tx queue to enable
1716  *
1717  * Enable a specific Tx queue interrupt.
1718  *
1719  * This function is not protected by the iflib CTX lock.
1720  */
1721 static int
1722 ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
1723 {
1724 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1725 	struct ice_vsi *vsi = &sc->pf_vsi;
1726 	struct ice_hw *hw = &sc->hw;
1727 
1728 	/* Do not enable queue interrupts in recovery mode */
1729 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1730 		return (ENOSYS);
1731 
1732 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
1733 	return (0);
1734 }
1735 
1736 /**
1737  * ice_if_promisc_set - Set device promiscuous mode
1738  * @ctx: iflib context structure
1739  * @flags: promiscuous flags to configure
1740  *
1741  * Called by iflib to configure device promiscuous mode.
1742  *
1743  * @remark Calls to this function will always overwrite the previous setting
1744  */
1745 static int
1746 ice_if_promisc_set(if_ctx_t ctx, int flags)
1747 {
1748 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1749 	struct ice_hw *hw = &sc->hw;
1750 	device_t dev = sc->dev;
1751 	enum ice_status status;
1752 	bool promisc_enable = flags & IFF_PROMISC;
1753 	bool multi_enable = flags & IFF_ALLMULTI;
1754 
1755 	/* Do not support configuration when in recovery mode */
1756 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1757 		return (ENOSYS);
1758 
1759 	if (multi_enable)
1760 		return (EOPNOTSUPP);
1761 
1762 	if (promisc_enable) {
1763 		status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx,
1764 					     ICE_VSI_PROMISC_MASK, 0);
1765 		if (status && status != ICE_ERR_ALREADY_EXISTS) {
1766 			device_printf(dev,
1767 				      "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n",
1768 				      ice_status_str(status),
1769 				      ice_aq_str(hw->adminq.sq_last_status));
1770 			return (EIO);
1771 		}
1772 	} else {
1773 		status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx,
1774 					       ICE_VSI_PROMISC_MASK, 0);
1775 		if (status) {
1776 			device_printf(dev,
1777 				      "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n",
1778 				      ice_status_str(status),
1779 				      ice_aq_str(hw->adminq.sq_last_status));
1780 			return (EIO);
1781 		}
1782 	}
1783 
1784 	return (0);
1785 }
1786 
1787 /**
1788  * ice_if_media_change - Change device media
1789  * @ctx: device ctx structure
1790  *
1791  * Called by iflib when a media change is requested. This operation is not
1792  * supported by the hardware, so we just return an error code.
1793  */
1794 static int
1795 ice_if_media_change(if_ctx_t ctx)
1796 {
1797 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1798 
1799 	device_printf(sc->dev, "Media change is not supported.\n");
1800 	return (ENODEV);
1801 }
1802 
1803 /**
1804  * ice_if_media_status - Report current device media
1805  * @ctx: iflib context structure
1806  * @ifmr: ifmedia request structure to update
1807  *
1808  * Updates the provided ifmr with current device media status, including link
1809  * status and media type.
1810  */
1811 static void
1812 ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr)
1813 {
1814 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1815 	struct ice_link_status *li = &sc->hw.port_info->phy.link_info;
1816 
1817 	ifmr->ifm_status = IFM_AVALID;
1818 	ifmr->ifm_active = IFM_ETHER;
1819 
1820 	/* Never report link up or media types when in recovery mode */
1821 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1822 		return;
1823 
1824 	if (!sc->link_up)
1825 		return;
1826 
1827 	ifmr->ifm_status |= IFM_ACTIVE;
1828 	ifmr->ifm_active |= IFM_FDX;
1829 
1830 	if (li->phy_type_low)
1831 		ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low);
1832 	else if (li->phy_type_high)
1833 		ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high);
1834 	else
1835 		ifmr->ifm_active |= IFM_UNKNOWN;
1836 
1837 	/* Report flow control status as well */
1838 	if (li->an_info & ICE_AQ_LINK_PAUSE_TX)
1839 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1840 	if (li->an_info & ICE_AQ_LINK_PAUSE_RX)
1841 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1842 }
1843 
1844 /**
1845  * ice_init_tx_tracking - Initialize Tx queue software tracking values
1846  * @vsi: the VSI to initialize
1847  *
1848  * Initialize Tx queue software tracking values, including the Report Status
1849  * queue, and related software tracking values.
1850  */
1851 static void
1852 ice_init_tx_tracking(struct ice_vsi *vsi)
1853 {
1854 	struct ice_tx_queue *txq;
1855 	size_t j;
1856 	int i;
1857 
1858 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1859 
1860 		txq->tx_rs_cidx = txq->tx_rs_pidx = 0;
1861 
1862 		/* Initialize the last processed descriptor to be the end of
1863 		 * the ring, rather than the start, so that we avoid an
1864 		 * off-by-one error in ice_ift_txd_credits_update for the
1865 		 * first packet.
1866 		 */
1867 		txq->tx_cidx_processed = txq->desc_count - 1;
1868 
1869 		for (j = 0; j < txq->desc_count; j++)
1870 			txq->tx_rsq[j] = QIDX_INVALID;
1871 	}
1872 }
1873 
1874 /**
1875  * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues
1876  * @sc: the device softc
1877  *
1878  * Called to update the Rx queue mbuf_sz parameter for configuring the receive
1879  * buffer sizes when programming hardware.
1880  */
1881 static void
1882 ice_update_rx_mbuf_sz(struct ice_softc *sc)
1883 {
1884 	uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx);
1885 	struct ice_vsi *vsi = &sc->pf_vsi;
1886 
1887 	MPASS(mbuf_sz <= UINT16_MAX);
1888 	vsi->mbuf_sz = mbuf_sz;
1889 }
1890 
1891 /**
1892  * ice_if_init - Initialize the device
1893  * @ctx: iflib ctx structure
1894  *
1895  * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes
1896  * device filters and prepares the Tx and Rx engines.
1897  *
1898  * @pre assumes the caller holds the iflib CTX lock
1899  */
1900 static void
1901 ice_if_init(if_ctx_t ctx)
1902 {
1903 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1904 	device_t dev = sc->dev;
1905 	int err;
1906 
1907 	ASSERT_CTX_LOCKED(sc);
1908 
1909 	/*
1910 	 * We've seen an issue with 11.3/12.1 where sideband routines are
1911 	 * called after detach is called.  This would call routines after
1912 	 * if_stop, causing issues with the teardown process.  This has
1913 	 * seemingly been fixed in STABLE snapshots, but it seems like a
1914 	 * good idea to have this guard here regardless.
1915 	 */
1916 	if (ice_driver_is_detaching(sc))
1917 		return;
1918 
1919 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1920 		return;
1921 
1922 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
1923 		device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n");
1924 		return;
1925 	}
1926 
1927 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
1928 		device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n");
1929 		return;
1930 	}
1931 
1932 	ice_update_rx_mbuf_sz(sc);
1933 
1934 	/* Update the MAC address... User might use a LAA */
1935 	err = ice_update_laa_mac(sc);
1936 	if (err) {
1937 		device_printf(dev,
1938 			      "LAA address change failed, err %s\n",
1939 			      ice_err_str(err));
1940 		return;
1941 	}
1942 
1943 	/* Initialize software Tx tracking values */
1944 	ice_init_tx_tracking(&sc->pf_vsi);
1945 
1946 	err = ice_cfg_vsi_for_tx(&sc->pf_vsi);
1947 	if (err) {
1948 		device_printf(dev,
1949 			      "Unable to configure the main VSI for Tx: %s\n",
1950 			      ice_err_str(err));
1951 		return;
1952 	}
1953 
1954 	err = ice_cfg_vsi_for_rx(&sc->pf_vsi);
1955 	if (err) {
1956 		device_printf(dev,
1957 			      "Unable to configure the main VSI for Rx: %s\n",
1958 			      ice_err_str(err));
1959 		goto err_cleanup_tx;
1960 	}
1961 
1962 	err = ice_control_all_rx_queues(&sc->pf_vsi, true);
1963 	if (err) {
1964 		device_printf(dev,
1965 			      "Unable to enable Rx rings for transmit: %s\n",
1966 			      ice_err_str(err));
1967 		goto err_cleanup_tx;
1968 	}
1969 
1970 	err = ice_cfg_pf_default_mac_filters(sc);
1971 	if (err) {
1972 		device_printf(dev,
1973 			      "Unable to configure default MAC filters: %s\n",
1974 			      ice_err_str(err));
1975 		goto err_stop_rx;
1976 	}
1977 
1978 	/* We use software interrupts for Tx, so we only program the hardware
1979 	 * interrupts for Rx.
1980 	 */
1981 	ice_configure_all_rxq_interrupts(&sc->pf_vsi);
1982 	ice_configure_rx_itr(&sc->pf_vsi);
1983 
1984 	/* Configure promiscuous mode */
1985 	ice_if_promisc_set(ctx, if_getflags(sc->ifp));
1986 
1987 	ice_rdma_pf_init(sc);
1988 
1989 	ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
1990 	return;
1991 
1992 err_stop_rx:
1993 	ice_control_all_rx_queues(&sc->pf_vsi, false);
1994 err_cleanup_tx:
1995 	ice_vsi_disable_tx(&sc->pf_vsi);
1996 }
1997 
1998 /**
1999  * ice_poll_for_media_avail - Re-enable link if media is detected
2000  * @sc: device private structure
2001  *
2002  * Intended to be called from the driver's timer function, this function
2003  * sends the Get Link Status AQ command and re-enables HW link if the
2004  * command says that media is available.
2005  *
2006  * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing,
2007  * since media removal events are supposed to be sent to the driver through
2008  * a link status event.
2009  */
2010 static void
2011 ice_poll_for_media_avail(struct ice_softc *sc)
2012 {
2013 	struct ice_hw *hw = &sc->hw;
2014 	struct ice_port_info *pi = hw->port_info;
2015 
2016 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2017 		pi->phy.get_link_info = true;
2018 		ice_get_link_status(pi, &sc->link_up);
2019 
2020 		if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
2021 			enum ice_status status;
2022 
2023 			/* Re-enable link and re-apply user link settings */
2024 			ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
2025 
2026 			/* Update the OS about changes in media capability */
2027 			status = ice_add_media_types(sc, sc->media);
2028 			if (status)
2029 				device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
2030 					      ice_status_str(status),
2031 					      ice_aq_str(hw->adminq.sq_last_status));
2032 
2033 			ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
2034 		}
2035 	}
2036 }
2037 
2038 /**
2039  * ice_if_timer - called by iflib periodically
2040  * @ctx: iflib ctx structure
2041  * @qid: the queue this timer was called for
2042  *
2043  * This callback is triggered by iflib periodically. We use it to update the
2044  * hw statistics.
2045  *
2046  * @remark this function is not protected by the iflib CTX lock.
2047  */
2048 static void
2049 ice_if_timer(if_ctx_t ctx, uint16_t qid)
2050 {
2051 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2052 	uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx;
2053 
2054 	if (qid != 0)
2055 		return;
2056 
2057 	/* Do not attempt to update stats when in recovery mode */
2058 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2059 		return;
2060 
2061 	/* Update device statistics */
2062 	ice_update_pf_stats(sc);
2063 
2064 	/*
2065 	 * For proper watchdog management, the iflib stack needs to know if
2066 	 * we've been paused during the last interval. Check if the
2067 	 * link_xoff_rx stat changed, and set the isc_pause_frames, if so.
2068 	 */
2069 	if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx)
2070 		sc->scctx->isc_pause_frames = 1;
2071 
2072 	/* Update the primary VSI stats */
2073 	ice_update_vsi_hw_stats(&sc->pf_vsi);
2074 }
2075 
2076 /**
2077  * ice_admin_timer - called periodically to trigger the admin task
2078  * @arg: callout(9) argument pointing to the device private softc structure
2079  *
2080  * Timer function used as part of a callout(9) timer that will periodically
2081  * trigger the admin task, even when the interface is down.
2082  *
2083  * @remark this function is not called by iflib and is not protected by the
2084  * iflib CTX lock.
2085  *
2086  * @remark because this is a callout function, it cannot sleep and should not
2087  * attempt taking the iflib CTX lock.
2088  */
2089 static void
2090 ice_admin_timer(void *arg)
2091 {
2092 	struct ice_softc *sc = (struct ice_softc *)arg;
2093 
2094 	/*
2095 	 * There is a point where callout routines are no longer
2096 	 * cancelable.  So there exists a window of time where the
2097 	 * driver enters detach() and tries to cancel the callout, but the
2098 	 * callout routine has passed the cancellation point.  The detach()
2099 	 * routine is unaware of this and tries to free resources that the
2100 	 * callout routine needs.  So we check for the detach state flag to
2101 	 * at least shrink the window of opportunity.
2102 	 */
2103 	if (ice_driver_is_detaching(sc))
2104 		return;
2105 
2106 	/* Fire off the admin task */
2107 	iflib_admin_intr_deferred(sc->ctx);
2108 
2109 	/* Reschedule the admin timer */
2110 	callout_schedule(&sc->admin_timer, hz/2);
2111 }
2112 
2113 /**
2114  * ice_transition_recovery_mode - Transition to recovery mode
2115  * @sc: the device private softc
2116  *
2117  * Called when the driver detects that the firmware has entered recovery mode
2118  * at run time.
2119  */
2120 static void
2121 ice_transition_recovery_mode(struct ice_softc *sc)
2122 {
2123 	struct ice_vsi *vsi = &sc->pf_vsi;
2124 	int i;
2125 
2126 	device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
2127 
2128 	/* Tell the stack that the link has gone down */
2129 	iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
2130 
2131 	/* Request that the device be re-initialized */
2132 	ice_request_stack_reinit(sc);
2133 
2134 	ice_rdma_pf_detach(sc);
2135 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2136 
2137 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2138 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2139 
2140 	ice_vsi_del_txqs_ctx(vsi);
2141 	ice_vsi_del_rxqs_ctx(vsi);
2142 
2143 	for (i = 0; i < sc->num_available_vsi; i++) {
2144 		if (sc->all_vsi[i])
2145 			ice_release_vsi(sc->all_vsi[i]);
2146 	}
2147 	sc->num_available_vsi = 0;
2148 
2149 	if (sc->all_vsi) {
2150 		free(sc->all_vsi, M_ICE);
2151 		sc->all_vsi = NULL;
2152 	}
2153 
2154 	/* Destroy the interrupt manager */
2155 	ice_resmgr_destroy(&sc->imgr);
2156 	/* Destroy the queue managers */
2157 	ice_resmgr_destroy(&sc->tx_qmgr);
2158 	ice_resmgr_destroy(&sc->rx_qmgr);
2159 
2160 	ice_deinit_hw(&sc->hw);
2161 }
2162 
2163 /**
2164  * ice_transition_safe_mode - Transition to safe mode
2165  * @sc: the device private softc
2166  *
2167  * Called when the driver attempts to reload the DDP package during a device
2168  * reset, and the new download fails. If so, we must transition to safe mode
2169  * at run time.
2170  *
2171  * @remark although safe mode normally allocates only a single queue, we can't
2172  * change the number of queues dynamically when using iflib. Due to this, we
2173  * do not attempt to reduce the number of queues.
2174  */
2175 static void
2176 ice_transition_safe_mode(struct ice_softc *sc)
2177 {
2178 	/* Indicate that we are in Safe mode */
2179 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
2180 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
2181 
2182 	ice_rdma_pf_detach(sc);
2183 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2184 
2185 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2186 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2187 
2188 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2189 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en);
2190 }
2191 
2192 /**
2193  * ice_if_update_admin_status - update admin status
2194  * @ctx: iflib ctx structure
2195  *
2196  * Called by iflib to update the admin status. For our purposes, this means
2197  * check the adminq, and update the link status. It's ultimately triggered by
2198  * our admin interrupt, or by the ice_if_timer periodically.
2199  *
2200  * @pre assumes the caller holds the iflib CTX lock
2201  */
2202 static void
2203 ice_if_update_admin_status(if_ctx_t ctx)
2204 {
2205 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2206 	enum ice_fw_modes fw_mode;
2207 	bool reschedule = false;
2208 	u16 pending = 0;
2209 
2210 	ASSERT_CTX_LOCKED(sc);
2211 
2212 	/* Check if the firmware entered recovery mode at run time */
2213 	fw_mode = ice_get_fw_mode(&sc->hw);
2214 	if (fw_mode == ICE_FW_MODE_REC) {
2215 		if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2216 			/* If we just entered recovery mode, log a warning to
2217 			 * the system administrator and deinit driver state
2218 			 * that is no longer functional.
2219 			 */
2220 			ice_transition_recovery_mode(sc);
2221 		}
2222 	} else if (fw_mode == ICE_FW_MODE_ROLLBACK) {
2223 		if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) {
2224 			/* Rollback mode isn't fatal, but we don't want to
2225 			 * repeatedly post a message about it.
2226 			 */
2227 			ice_print_rollback_msg(&sc->hw);
2228 		}
2229 	}
2230 
2231 	/* Handle global reset events */
2232 	ice_handle_reset_event(sc);
2233 
2234 	/* Handle PF reset requests */
2235 	ice_handle_pf_reset_request(sc);
2236 
2237 	/* Handle MDD events */
2238 	ice_handle_mdd_event(sc);
2239 
2240 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) ||
2241 	    ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) ||
2242 	    ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2243 		/*
2244 		 * If we know the control queues are disabled, skip processing
2245 		 * the control queues entirely.
2246 		 */
2247 		;
2248 	} else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) {
2249 		ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending);
2250 		if (pending > 0)
2251 			reschedule = true;
2252 
2253 		ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending);
2254 		if (pending > 0)
2255 			reschedule = true;
2256 	}
2257 
2258 	/* Poll for link up */
2259 	ice_poll_for_media_avail(sc);
2260 
2261 	/* Check and update link status */
2262 	ice_update_link_status(sc, false);
2263 
2264 	/*
2265 	 * If there are still messages to process, we need to reschedule
2266 	 * ourselves. Otherwise, we can just re-enable the interrupt. We'll be
2267 	 * woken up at the next interrupt or timer event.
2268 	 */
2269 	if (reschedule) {
2270 		ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
2271 		iflib_admin_intr_deferred(ctx);
2272 	} else {
2273 		ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2274 	}
2275 }
2276 
2277 /**
2278  * ice_prepare_for_reset - Prepare device for an impending reset
2279  * @sc: The device private softc
2280  *
2281  * Prepare the driver for an impending reset, shutting down VSIs, clearing the
2282  * scheduler setup, and shutting down controlqs. Uses the
2283  * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the
2284  * driver for reset or not.
2285  */
2286 static void
2287 ice_prepare_for_reset(struct ice_softc *sc)
2288 {
2289 	struct ice_hw *hw = &sc->hw;
2290 
2291 	/* If we're already prepared, there's nothing to do */
2292 	if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET))
2293 		return;
2294 
2295 	log(LOG_INFO, "%s: preparing to reset device logic\n", if_name(sc->ifp));
2296 
2297 	/* In recovery mode, hardware is not initialized */
2298 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2299 		return;
2300 
2301 	/* stop the RDMA client */
2302 	ice_rdma_pf_stop(sc);
2303 
2304 	/* Release the main PF VSI queue mappings */
2305 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2306 				    sc->pf_vsi.num_tx_queues);
2307 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2308 				    sc->pf_vsi.num_rx_queues);
2309 
2310 	ice_clear_hw_tbls(hw);
2311 
2312 	if (hw->port_info)
2313 		ice_sched_clear_port(hw->port_info);
2314 
2315 	ice_shutdown_all_ctrlq(hw, false);
2316 }
2317 
2318 /**
2319  * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping
2320  * @sc: the device softc pointer
2321  *
2322  * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue
2323  * mapping after a reset occurred.
2324  */
2325 static int
2326 ice_rebuild_pf_vsi_qmap(struct ice_softc *sc)
2327 {
2328 	struct ice_vsi *vsi = &sc->pf_vsi;
2329 	struct ice_tx_queue *txq;
2330 	struct ice_rx_queue *rxq;
2331 	int err, i;
2332 
2333 	/* Re-assign Tx queues from PF space to the main VSI */
2334 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap,
2335 					    vsi->num_tx_queues);
2336 	if (err) {
2337 		device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n",
2338 			      ice_err_str(err));
2339 		return (err);
2340 	}
2341 
2342 	/* Re-assign Rx queues from PF space to this VSI */
2343 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap,
2344 					    vsi->num_rx_queues);
2345 	if (err) {
2346 		device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n",
2347 			      ice_err_str(err));
2348 		goto err_release_tx_queues;
2349 	}
2350 
2351 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
2352 
2353 	/* Re-assign Tx queue tail pointers */
2354 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
2355 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
2356 
2357 	/* Re-assign Rx queue tail pointers */
2358 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
2359 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
2360 
2361 	return (0);
2362 
2363 err_release_tx_queues:
2364 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2365 				   sc->pf_vsi.num_tx_queues);
2366 
2367 	return (err);
2368 }
2369 
2370 /* determine if the iflib context is active */
2371 #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING))
2372 
2373 /**
2374  * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode
2375  * @sc: The device private softc
2376  *
2377  * Handle a driver rebuild while in recovery mode. This will only rebuild the
2378  * limited functionality supported while in recovery mode.
2379  */
2380 static void
2381 ice_rebuild_recovery_mode(struct ice_softc *sc)
2382 {
2383 	device_t dev = sc->dev;
2384 
2385 	/* enable PCIe bus master */
2386 	pci_enable_busmaster(dev);
2387 
2388 	/* Configure interrupt causes for the administrative interrupt */
2389 	ice_configure_misc_interrupts(sc);
2390 
2391 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2392 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2393 
2394 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2395 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2396 
2397 	log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp));
2398 
2399 	/* In order to completely restore device functionality, the iflib core
2400 	 * needs to be reset. We need to request an iflib reset. Additionally,
2401 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2402 	 * the iflib core, we also want re-run the admin task so that iflib
2403 	 * resets immediately instead of waiting for the next interrupt.
2404 	 */
2405 	ice_request_stack_reinit(sc);
2406 
2407 	return;
2408 }
2409 
2410 /**
2411  * ice_rebuild - Rebuild driver state post reset
2412  * @sc: The device private softc
2413  *
2414  * Restore driver state after a reset occurred. Restart the controlqs, setup
2415  * the hardware port, and re-enable the VSIs.
2416  */
2417 static void
2418 ice_rebuild(struct ice_softc *sc)
2419 {
2420 	struct ice_hw *hw = &sc->hw;
2421 	device_t dev = sc->dev;
2422 	enum ice_ddp_state pkg_state;
2423 	enum ice_status status;
2424 	int err;
2425 
2426 	sc->rebuild_ticks = ticks;
2427 
2428 	/* If we're rebuilding, then a reset has succeeded. */
2429 	ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED);
2430 
2431 	/*
2432 	 * If the firmware is in recovery mode, only restore the limited
2433 	 * functionality supported by recovery mode.
2434 	 */
2435 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2436 		ice_rebuild_recovery_mode(sc);
2437 		return;
2438 	}
2439 
2440 	/* enable PCIe bus master */
2441 	pci_enable_busmaster(dev);
2442 
2443 	status = ice_init_all_ctrlq(hw);
2444 	if (status) {
2445 		device_printf(dev, "failed to re-init controlqs, err %s\n",
2446 			      ice_status_str(status));
2447 		goto err_shutdown_ctrlq;
2448 	}
2449 
2450 	/* Query the allocated resources for Tx scheduler */
2451 	status = ice_sched_query_res_alloc(hw);
2452 	if (status) {
2453 		device_printf(dev,
2454 			      "Failed to query scheduler resources, err %s aq_err %s\n",
2455 			      ice_status_str(status),
2456 			      ice_aq_str(hw->adminq.sq_last_status));
2457 		goto err_shutdown_ctrlq;
2458 	}
2459 
2460 	/* Re-enable FW logging. Keep going even if this fails */
2461 	status = ice_fwlog_set(hw, &hw->fwlog_cfg);
2462 	if (!status) {
2463 		/*
2464 		 * We should have the most updated cached copy of the
2465 		 * configuration, regardless of whether we're rebuilding
2466 		 * or not.  So we'll simply check to see if logging was
2467 		 * enabled pre-rebuild.
2468 		 */
2469 		if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2470 			status = ice_fwlog_register(hw);
2471 			if (status)
2472 				device_printf(dev, "failed to re-register fw logging, err %s aq_err %s\n",
2473 				   ice_status_str(status),
2474 				   ice_aq_str(hw->adminq.sq_last_status));
2475 		}
2476 	} else
2477 		device_printf(dev, "failed to rebuild fw logging configuration, err %s aq_err %s\n",
2478 		   ice_status_str(status),
2479 		   ice_aq_str(hw->adminq.sq_last_status));
2480 
2481 	err = ice_send_version(sc);
2482 	if (err)
2483 		goto err_shutdown_ctrlq;
2484 
2485 	err = ice_init_link_events(sc);
2486 	if (err) {
2487 		device_printf(dev, "ice_init_link_events failed: %s\n",
2488 			      ice_err_str(err));
2489 		goto err_shutdown_ctrlq;
2490 	}
2491 
2492 	status = ice_clear_pf_cfg(hw);
2493 	if (status) {
2494 		device_printf(dev, "failed to clear PF configuration, err %s\n",
2495 			      ice_status_str(status));
2496 		goto err_shutdown_ctrlq;
2497 	}
2498 
2499 	ice_clear_pxe_mode(hw);
2500 
2501 	status = ice_get_caps(hw);
2502 	if (status) {
2503 		device_printf(dev, "failed to get capabilities, err %s\n",
2504 			      ice_status_str(status));
2505 		goto err_shutdown_ctrlq;
2506 	}
2507 
2508 	status = ice_sched_init_port(hw->port_info);
2509 	if (status) {
2510 		device_printf(dev, "failed to initialize port, err %s\n",
2511 			      ice_status_str(status));
2512 		goto err_sched_cleanup;
2513 	}
2514 
2515 	/* If we previously loaded the package, it needs to be reloaded now */
2516 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) {
2517 		pkg_state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
2518 		if (!ice_is_init_pkg_successful(pkg_state)) {
2519 			ice_log_pkg_init(sc, pkg_state);
2520 			ice_transition_safe_mode(sc);
2521 		}
2522 	}
2523 
2524 	ice_reset_pf_stats(sc);
2525 
2526 	err = ice_rebuild_pf_vsi_qmap(sc);
2527 	if (err) {
2528 		device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n",
2529 			      ice_err_str(err));
2530 		goto err_sched_cleanup;
2531 	}
2532 	err = ice_initialize_vsi(&sc->pf_vsi);
2533 	if (err) {
2534 		device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n",
2535 			      ice_err_str(err));
2536 		goto err_release_queue_allocations;
2537 	}
2538 
2539 	/* Replay all VSI configuration */
2540 	err = ice_replay_all_vsi_cfg(sc);
2541 	if (err)
2542 		goto err_deinit_pf_vsi;
2543 
2544 	/* Re-enable FW health event reporting */
2545 	ice_init_health_events(sc);
2546 
2547 	/* Reconfigure the main PF VSI for RSS */
2548 	err = ice_config_rss(&sc->pf_vsi);
2549 	if (err) {
2550 		device_printf(sc->dev,
2551 			      "Unable to reconfigure RSS for the main VSI, err %s\n",
2552 			      ice_err_str(err));
2553 		goto err_deinit_pf_vsi;
2554 	}
2555 
2556 	/* Refresh link status */
2557 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2558 	sc->hw.port_info->phy.get_link_info = true;
2559 	ice_get_link_status(sc->hw.port_info, &sc->link_up);
2560 	ice_update_link_status(sc, true);
2561 
2562 	/* RDMA interface will be restarted by the stack re-init */
2563 
2564 	/* Configure interrupt causes for the administrative interrupt */
2565 	ice_configure_misc_interrupts(sc);
2566 
2567 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2568 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2569 
2570 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2571 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2572 
2573 	log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp));
2574 
2575 	/* In order to completely restore device functionality, the iflib core
2576 	 * needs to be reset. We need to request an iflib reset. Additionally,
2577 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2578 	 * the iflib core, we also want re-run the admin task so that iflib
2579 	 * resets immediately instead of waiting for the next interrupt.
2580 	 */
2581 	ice_request_stack_reinit(sc);
2582 
2583 	return;
2584 
2585 err_deinit_pf_vsi:
2586 	ice_deinit_vsi(&sc->pf_vsi);
2587 err_release_queue_allocations:
2588 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2589 				    sc->pf_vsi.num_tx_queues);
2590 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2591 				    sc->pf_vsi.num_rx_queues);
2592 err_sched_cleanup:
2593 	ice_sched_cleanup_all(hw);
2594 err_shutdown_ctrlq:
2595 	ice_shutdown_all_ctrlq(hw, false);
2596 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2597 	ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2598 	device_printf(dev, "Driver rebuild failed, please reload the device driver\n");
2599 }
2600 
2601 /**
2602  * ice_handle_reset_event - Handle reset events triggered by OICR
2603  * @sc: The device private softc
2604  *
2605  * Handle reset events triggered by an OICR notification. This includes CORER,
2606  * GLOBR, and EMPR resets triggered by software on this or any other PF or by
2607  * firmware.
2608  *
2609  * @pre assumes the iflib context lock is held, and will unlock it while
2610  * waiting for the hardware to finish reset.
2611  */
2612 static void
2613 ice_handle_reset_event(struct ice_softc *sc)
2614 {
2615 	struct ice_hw *hw = &sc->hw;
2616 	enum ice_status status;
2617 	device_t dev = sc->dev;
2618 
2619 	/* When a CORER, GLOBR, or EMPR is about to happen, the hardware will
2620 	 * trigger an OICR interrupt. Our OICR handler will determine when
2621 	 * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as
2622 	 * appropriate.
2623 	 */
2624 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
2625 		return;
2626 
2627 	ice_prepare_for_reset(sc);
2628 
2629 	/*
2630 	 * Release the iflib context lock and wait for the device to finish
2631 	 * resetting.
2632 	 */
2633 	IFLIB_CTX_UNLOCK(sc);
2634 	status = ice_check_reset(hw);
2635 	IFLIB_CTX_LOCK(sc);
2636 	if (status) {
2637 		device_printf(dev, "Device never came out of reset, err %s\n",
2638 			      ice_status_str(status));
2639 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2640 		return;
2641 	}
2642 
2643 	/* We're done with the reset, so we can rebuild driver state */
2644 	sc->hw.reset_ongoing = false;
2645 	ice_rebuild(sc);
2646 
2647 	/* In the unlikely event that a PF reset request occurs at the same
2648 	 * time as a global reset, clear the request now. This avoids
2649 	 * resetting a second time right after we reset due to a global event.
2650 	 */
2651 	if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2652 		device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n");
2653 }
2654 
2655 /**
2656  * ice_handle_pf_reset_request - Initiate PF reset requested by software
2657  * @sc: The device private softc
2658  *
2659  * Initiate a PF reset requested by software. We handle this in the admin task
2660  * so that only one thread actually handles driver preparation and cleanup,
2661  * rather than having multiple threads possibly attempt to run this code
2662  * simultaneously.
2663  *
2664  * @pre assumes the iflib context lock is held and will unlock it while
2665  * waiting for the PF reset to complete.
2666  */
2667 static void
2668 ice_handle_pf_reset_request(struct ice_softc *sc)
2669 {
2670 	struct ice_hw *hw = &sc->hw;
2671 	enum ice_status status;
2672 
2673 	/* Check for PF reset requests */
2674 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2675 		return;
2676 
2677 	/* Make sure we're prepared for reset */
2678 	ice_prepare_for_reset(sc);
2679 
2680 	/*
2681 	 * Release the iflib context lock and wait for the device to finish
2682 	 * resetting.
2683 	 */
2684 	IFLIB_CTX_UNLOCK(sc);
2685 	status = ice_reset(hw, ICE_RESET_PFR);
2686 	IFLIB_CTX_LOCK(sc);
2687 	if (status) {
2688 		device_printf(sc->dev, "device PF reset failed, err %s\n",
2689 			      ice_status_str(status));
2690 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2691 		return;
2692 	}
2693 
2694 	sc->soft_stats.pfr_count++;
2695 	ice_rebuild(sc);
2696 }
2697 
2698 /**
2699  * ice_init_device_features - Init device driver features
2700  * @sc: driver softc structure
2701  *
2702  * @pre assumes that the function capabilities bits have been set up by
2703  * ice_init_hw().
2704  */
2705 static void
2706 ice_init_device_features(struct ice_softc *sc)
2707 {
2708 	/* Set capabilities that all devices support */
2709 	ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2710 	ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
2711 	ice_set_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2712 	ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
2713 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap);
2714 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap);
2715 	ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2716 	ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2717 	ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_cap);
2718 	ice_set_bit(ICE_FEATURE_DCB, sc->feat_cap);
2719 	ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2720 
2721 	/* Disable features due to hardware limitations... */
2722 	if (!sc->hw.func_caps.common_cap.rss_table_size)
2723 		ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2724 	if (!sc->hw.func_caps.common_cap.iwarp || !ice_enable_irdma)
2725 		ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2726 	if (!sc->hw.func_caps.common_cap.dcb)
2727 		ice_clear_bit(ICE_FEATURE_DCB, sc->feat_cap);
2728 	/* Disable features due to firmware limitations... */
2729 	if (!ice_is_fw_health_report_supported(&sc->hw))
2730 		ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2731 	if (!ice_fwlog_supported(&sc->hw))
2732 		ice_clear_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2733 	if (sc->hw.fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2734 		if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_FW_LOGGING))
2735 			ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_en);
2736 		else
2737 			ice_fwlog_unregister(&sc->hw);
2738 	}
2739 
2740 	/* Disable capabilities not supported by the OS */
2741 	ice_disable_unsupported_features(sc->feat_cap);
2742 
2743 	/* RSS is always enabled for iflib */
2744 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS))
2745 		ice_set_bit(ICE_FEATURE_RSS, sc->feat_en);
2746 
2747 	/* Disable features based on sysctl settings */
2748 	if (!ice_tx_balance_en)
2749 		ice_clear_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2750 }
2751 
2752 /**
2753  * ice_if_multi_set - Callback to update Multicast filters in HW
2754  * @ctx: iflib ctx structure
2755  *
2756  * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search
2757  * the if_multiaddrs list and determine which filters have been added or
2758  * removed from the list, and update HW programming to reflect the new list.
2759  *
2760  * @pre assumes the caller holds the iflib CTX lock
2761  */
2762 static void
2763 ice_if_multi_set(if_ctx_t ctx)
2764 {
2765 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2766 	int err;
2767 
2768 	ASSERT_CTX_LOCKED(sc);
2769 
2770 	/* Do not handle multicast configuration in recovery mode */
2771 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2772 		return;
2773 
2774 	err = ice_sync_multicast_filters(sc);
2775 	if (err) {
2776 		device_printf(sc->dev,
2777 			      "Failed to synchronize multicast filter list: %s\n",
2778 			      ice_err_str(err));
2779 		return;
2780 	}
2781 }
2782 
2783 /**
2784  * ice_if_vlan_register - Register a VLAN with the hardware
2785  * @ctx: iflib ctx pointer
2786  * @vtag: VLAN to add
2787  *
2788  * Programs the main PF VSI with a hardware filter for the given VLAN.
2789  *
2790  * @pre assumes the caller holds the iflib CTX lock
2791  */
2792 static void
2793 ice_if_vlan_register(if_ctx_t ctx, u16 vtag)
2794 {
2795 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2796 	enum ice_status status;
2797 
2798 	ASSERT_CTX_LOCKED(sc);
2799 
2800 	/* Do not handle VLAN configuration in recovery mode */
2801 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2802 		return;
2803 
2804 	status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag);
2805 	if (status) {
2806 		device_printf(sc->dev,
2807 			      "Failure adding VLAN %d to main VSI, err %s aq_err %s\n",
2808 			      vtag, ice_status_str(status),
2809 			      ice_aq_str(sc->hw.adminq.sq_last_status));
2810 	}
2811 }
2812 
2813 /**
2814  * ice_if_vlan_unregister - Remove a VLAN filter from the hardware
2815  * @ctx: iflib ctx pointer
2816  * @vtag: VLAN to add
2817  *
2818  * Removes the previously programmed VLAN filter from the main PF VSI.
2819  *
2820  * @pre assumes the caller holds the iflib CTX lock
2821  */
2822 static void
2823 ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag)
2824 {
2825 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2826 	enum ice_status status;
2827 
2828 	ASSERT_CTX_LOCKED(sc);
2829 
2830 	/* Do not handle VLAN configuration in recovery mode */
2831 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2832 		return;
2833 
2834 	status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag);
2835 	if (status) {
2836 		device_printf(sc->dev,
2837 			      "Failure removing VLAN %d from main VSI, err %s aq_err %s\n",
2838 			      vtag, ice_status_str(status),
2839 			      ice_aq_str(sc->hw.adminq.sq_last_status));
2840 	}
2841 }
2842 
2843 /**
2844  * ice_if_stop - Stop the device
2845  * @ctx: iflib context structure
2846  *
2847  * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0
2848  * down)
2849  *
2850  * @pre assumes the caller holds the iflib CTX lock
2851  */
2852 static void
2853 ice_if_stop(if_ctx_t ctx)
2854 {
2855 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2856 
2857 	ASSERT_CTX_LOCKED(sc);
2858 
2859 	/*
2860 	 * The iflib core may call IFDI_STOP prior to the first call to
2861 	 * IFDI_INIT. This will cause us to attempt to remove MAC filters we
2862 	 * don't have, and disable Tx queues which aren't yet configured.
2863 	 * Although it is likely these extra operations are harmless, they do
2864 	 * cause spurious warning messages to be displayed, which may confuse
2865 	 * users.
2866 	 *
2867 	 * To avoid these messages, we use a state bit indicating if we've
2868 	 * been initialized. It will be set when ice_if_init is called, and
2869 	 * cleared here in ice_if_stop.
2870 	 */
2871 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
2872 		return;
2873 
2874 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
2875 		device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n");
2876 		return;
2877 	}
2878 
2879 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
2880 		device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n");
2881 		return;
2882 	}
2883 
2884 	ice_rdma_pf_stop(sc);
2885 
2886 	/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
2887 	 * return of these functions because there's nothing we can really do
2888 	 * if they fail, and the functions already print error messages.
2889 	 * Just try to shut down as much as we can.
2890 	 */
2891 	ice_rm_pf_default_mac_filters(sc);
2892 
2893 	/* Dissociate the Tx and Rx queues from the interrupts */
2894 	ice_flush_txq_interrupts(&sc->pf_vsi);
2895 	ice_flush_rxq_interrupts(&sc->pf_vsi);
2896 
2897 	/* Disable the Tx and Rx queues */
2898 	ice_vsi_disable_tx(&sc->pf_vsi);
2899 	ice_control_all_rx_queues(&sc->pf_vsi, false);
2900 }
2901 
2902 /**
2903  * ice_if_get_counter - Get current value of an ifnet statistic
2904  * @ctx: iflib context pointer
2905  * @counter: ifnet counter to read
2906  *
2907  * Reads the current value of an ifnet counter for the device.
2908  *
2909  * This function is not protected by the iflib CTX lock.
2910  */
2911 static uint64_t
2912 ice_if_get_counter(if_ctx_t ctx, ift_counter counter)
2913 {
2914 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2915 
2916 	/* Return the counter for the main PF VSI */
2917 	return ice_get_ifnet_counter(&sc->pf_vsi, counter);
2918 }
2919 
2920 /**
2921  * ice_request_stack_reinit - Request that iflib re-initialize
2922  * @sc: the device private softc
2923  *
2924  * Request that the device be brought down and up, to re-initialize. For
2925  * example, this may be called when a device reset occurs, or when Tx and Rx
2926  * queues need to be re-initialized.
2927  *
2928  * This is required because the iflib state is outside the driver, and must be
2929  * re-initialized if we need to resart Tx and Rx queues.
2930  */
2931 void
2932 ice_request_stack_reinit(struct ice_softc *sc)
2933 {
2934 	if (CTX_ACTIVE(sc->ctx)) {
2935 		iflib_request_reset(sc->ctx);
2936 		iflib_admin_intr_deferred(sc->ctx);
2937 	}
2938 }
2939 
2940 /**
2941  * ice_driver_is_detaching - Check if the driver is detaching/unloading
2942  * @sc: device private softc
2943  *
2944  * Returns true if the driver is detaching, false otherwise.
2945  *
2946  * @remark on newer kernels, take advantage of iflib_in_detach in order to
2947  * report detachment correctly as early as possible.
2948  *
2949  * @remark this function is used by various code paths that want to avoid
2950  * running if the driver is about to be removed. This includes sysctls and
2951  * other driver access points. Note that it does not fully resolve
2952  * detach-based race conditions as it is possible for a thread to race with
2953  * iflib_in_detach.
2954  */
2955 bool
2956 ice_driver_is_detaching(struct ice_softc *sc)
2957 {
2958 	return (ice_test_state(&sc->state, ICE_STATE_DETACHING) ||
2959 		iflib_in_detach(sc->ctx));
2960 }
2961 
2962 /**
2963  * ice_if_priv_ioctl - Device private ioctl handler
2964  * @ctx: iflib context pointer
2965  * @command: The ioctl command issued
2966  * @data: ioctl specific data
2967  *
2968  * iflib callback for handling custom driver specific ioctls.
2969  *
2970  * @pre Assumes that the iflib context lock is held.
2971  */
2972 static int
2973 ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
2974 {
2975 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2976 	struct ifdrv *ifd;
2977 	device_t dev = sc->dev;
2978 
2979 	if (data == NULL)
2980 		return (EINVAL);
2981 
2982 	ASSERT_CTX_LOCKED(sc);
2983 
2984 	/* Make sure the command type is valid */
2985 	switch (command) {
2986 	case SIOCSDRVSPEC:
2987 	case SIOCGDRVSPEC:
2988 		/* Accepted commands */
2989 		break;
2990 	case SIOCGPRIVATE_0:
2991 		/*
2992 		 * Although we do not support this ioctl command, it's
2993 		 * expected that iflib will forward it to the IFDI_PRIV_IOCTL
2994 		 * handler. Do not print a message in this case
2995 		 */
2996 		return (ENOTSUP);
2997 	default:
2998 		/*
2999 		 * If we get a different command for this function, it's
3000 		 * definitely unexpected, so log a message indicating what
3001 		 * command we got for debugging purposes.
3002 		 */
3003 		device_printf(dev, "%s: unexpected ioctl command %08lx\n",
3004 			      __func__, command);
3005 		return (EINVAL);
3006 	}
3007 
3008 	ifd = (struct ifdrv *)data;
3009 
3010 	switch (ifd->ifd_cmd) {
3011 	case ICE_NVM_ACCESS:
3012 		return ice_handle_nvm_access_ioctl(sc, ifd);
3013 	case ICE_DEBUG_DUMP:
3014 		return ice_handle_debug_dump_ioctl(sc, ifd);
3015 	default:
3016 		return EINVAL;
3017 	}
3018 }
3019 
3020 /**
3021  * ice_if_i2c_req - I2C request handler for iflib
3022  * @ctx: iflib context pointer
3023  * @req: The I2C parameters to use
3024  *
3025  * Read from the port's I2C eeprom using the parameters from the ioctl.
3026  *
3027  * @remark The iflib-only part is pretty simple.
3028  */
3029 static int
3030 ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req)
3031 {
3032 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3033 
3034 	return ice_handle_i2c_req(sc, req);
3035 }
3036 
3037 /**
3038  * ice_if_suspend - PCI device suspend handler for iflib
3039  * @ctx: iflib context pointer
3040  *
3041  * Deinitializes the driver and clears HW resources in preparation for
3042  * suspend or an FLR.
3043  *
3044  * @returns 0; this return value is ignored
3045  */
3046 static int
3047 ice_if_suspend(if_ctx_t ctx)
3048 {
3049 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3050 
3051 	/* At least a PFR is always going to happen after this;
3052 	 * either via FLR or during the D3->D0 transition.
3053 	 */
3054 	ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
3055 
3056 	ice_prepare_for_reset(sc);
3057 
3058 	return (0);
3059 }
3060 
3061 /**
3062  * ice_if_resume - PCI device resume handler for iflib
3063  * @ctx: iflib context pointer
3064  *
3065  * Reinitializes the driver and the HW after PCI resume or after
3066  * an FLR. An init is performed by iflib after this function is finished.
3067  *
3068  * @returns 0; this return value is ignored
3069  */
3070 static int
3071 ice_if_resume(if_ctx_t ctx)
3072 {
3073 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3074 
3075 	ice_rebuild(sc);
3076 
3077 	return (0);
3078 }
3079 
3080