xref: /freebsd/sys/dev/ice/if_ice_iflib.c (revision 2008043f386721d58158e37e0d7e50df8095942d)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2023, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /**
33  * @file if_ice_iflib.c
34  * @brief iflib driver implementation
35  *
36  * Contains the main entry point for the iflib driver implementation. It
37  * implements the various ifdi driver methods, and sets up the module and
38  * driver values to load an iflib driver.
39  */
40 
41 #include "ice_iflib.h"
42 #include "ice_drv_info.h"
43 #include "ice_switch.h"
44 #include "ice_sched.h"
45 
46 #include <sys/module.h>
47 #include <sys/sockio.h>
48 #include <sys/smp.h>
49 #include <dev/pci/pcivar.h>
50 #include <dev/pci/pcireg.h>
51 
52 /*
53  * Device method prototypes
54  */
55 
56 static void *ice_register(device_t);
57 static int  ice_if_attach_pre(if_ctx_t);
58 static int  ice_attach_pre_recovery_mode(struct ice_softc *sc);
59 static int  ice_if_attach_post(if_ctx_t);
60 static void ice_attach_post_recovery_mode(struct ice_softc *sc);
61 static int  ice_if_detach(if_ctx_t);
62 static int  ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets);
63 static int  ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets);
64 static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix);
65 static void ice_if_queues_free(if_ctx_t ctx);
66 static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu);
67 static void ice_if_intr_enable(if_ctx_t ctx);
68 static void ice_if_intr_disable(if_ctx_t ctx);
69 static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
70 static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
71 static int ice_if_promisc_set(if_ctx_t ctx, int flags);
72 static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
73 static int ice_if_media_change(if_ctx_t ctx);
74 static void ice_if_init(if_ctx_t ctx);
75 static void ice_if_timer(if_ctx_t ctx, uint16_t qid);
76 static void ice_if_update_admin_status(if_ctx_t ctx);
77 static void ice_if_multi_set(if_ctx_t ctx);
78 static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag);
79 static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
80 static void ice_if_stop(if_ctx_t ctx);
81 static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter);
82 static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
83 static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
84 static int ice_if_suspend(if_ctx_t ctx);
85 static int ice_if_resume(if_ctx_t ctx);
86 static bool ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event);
87 
88 static int ice_msix_que(void *arg);
89 static int ice_msix_admin(void *arg);
90 
91 /*
92  * Helper function prototypes
93  */
94 static int ice_pci_mapping(struct ice_softc *sc);
95 static void ice_free_pci_mapping(struct ice_softc *sc);
96 static void ice_update_link_status(struct ice_softc *sc, bool update_media);
97 static void ice_init_device_features(struct ice_softc *sc);
98 static void ice_init_tx_tracking(struct ice_vsi *vsi);
99 static void ice_handle_reset_event(struct ice_softc *sc);
100 static void ice_handle_pf_reset_request(struct ice_softc *sc);
101 static void ice_prepare_for_reset(struct ice_softc *sc);
102 static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc);
103 static void ice_rebuild(struct ice_softc *sc);
104 static void ice_rebuild_recovery_mode(struct ice_softc *sc);
105 static void ice_free_irqvs(struct ice_softc *sc);
106 static void ice_update_rx_mbuf_sz(struct ice_softc *sc);
107 static void ice_poll_for_media_avail(struct ice_softc *sc);
108 static void ice_setup_scctx(struct ice_softc *sc);
109 static int ice_allocate_msix(struct ice_softc *sc);
110 static void ice_admin_timer(void *arg);
111 static void ice_transition_recovery_mode(struct ice_softc *sc);
112 static void ice_transition_safe_mode(struct ice_softc *sc);
113 static void ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask);
114 
115 /*
116  * Device Interface Declaration
117  */
118 
119 /**
120  * @var ice_methods
121  * @brief ice driver method entry points
122  *
123  * List of device methods implementing the generic device interface used by
124  * the device stack to interact with the ice driver. Since this is an iflib
125  * driver, most of the methods point to the generic iflib implementation.
126  */
127 static device_method_t ice_methods[] = {
128 	/* Device interface */
129 	DEVMETHOD(device_register, ice_register),
130 	DEVMETHOD(device_probe,    iflib_device_probe_vendor),
131 	DEVMETHOD(device_attach,   iflib_device_attach),
132 	DEVMETHOD(device_detach,   iflib_device_detach),
133 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
134 	DEVMETHOD(device_suspend,  iflib_device_suspend),
135 	DEVMETHOD(device_resume,   iflib_device_resume),
136 	DEVMETHOD_END
137 };
138 
139 /**
140  * @var ice_iflib_methods
141  * @brief iflib method entry points
142  *
143  * List of device methods used by the iflib stack to interact with this
144  * driver. These are the real main entry points used to interact with this
145  * driver.
146  */
147 static device_method_t ice_iflib_methods[] = {
148 	DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre),
149 	DEVMETHOD(ifdi_attach_post, ice_if_attach_post),
150 	DEVMETHOD(ifdi_detach, ice_if_detach),
151 	DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc),
152 	DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc),
153 	DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign),
154 	DEVMETHOD(ifdi_queues_free, ice_if_queues_free),
155 	DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set),
156 	DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable),
157 	DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable),
158 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable),
159 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable),
160 	DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set),
161 	DEVMETHOD(ifdi_media_status, ice_if_media_status),
162 	DEVMETHOD(ifdi_media_change, ice_if_media_change),
163 	DEVMETHOD(ifdi_init, ice_if_init),
164 	DEVMETHOD(ifdi_stop, ice_if_stop),
165 	DEVMETHOD(ifdi_timer, ice_if_timer),
166 	DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status),
167 	DEVMETHOD(ifdi_multi_set, ice_if_multi_set),
168 	DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register),
169 	DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister),
170 	DEVMETHOD(ifdi_get_counter, ice_if_get_counter),
171 	DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl),
172 	DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req),
173 	DEVMETHOD(ifdi_suspend, ice_if_suspend),
174 	DEVMETHOD(ifdi_resume, ice_if_resume),
175 	DEVMETHOD(ifdi_needs_restart, ice_if_needs_restart),
176 	DEVMETHOD_END
177 };
178 
179 /**
180  * @var ice_driver
181  * @brief driver structure for the generic device stack
182  *
183  * driver_t definition used to setup the generic device methods.
184  */
185 static driver_t ice_driver = {
186 	.name = "ice",
187 	.methods = ice_methods,
188 	.size = sizeof(struct ice_softc),
189 };
190 
191 /**
192  * @var ice_iflib_driver
193  * @brief driver structure for the iflib stack
194  *
195  * driver_t definition used to setup the iflib device methods.
196  */
197 static driver_t ice_iflib_driver = {
198 	.name = "ice",
199 	.methods = ice_iflib_methods,
200 	.size = sizeof(struct ice_softc),
201 };
202 
203 extern struct if_txrx ice_txrx;
204 extern struct if_txrx ice_recovery_txrx;
205 
206 /**
207  * @var ice_sctx
208  * @brief ice driver shared context
209  *
210  * Structure defining shared values (context) that is used by all instances of
211  * the device. Primarily used to setup details about how the iflib stack
212  * should treat this driver. Also defines the default, minimum, and maximum
213  * number of descriptors in each ring.
214  */
215 static struct if_shared_ctx ice_sctx = {
216 	.isc_magic = IFLIB_MAGIC,
217 	.isc_q_align = PAGE_SIZE,
218 
219 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
220 	/* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but
221 	 * that doesn't make sense since that would be larger than the maximum
222 	 * size of a single packet.
223 	 */
224 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
225 
226 	/* XXX: This is only used by iflib to ensure that
227 	 * scctx->isc_tx_tso_size_max + the VLAN header is a valid size.
228 	 */
229 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
230 	/* XXX: This is used by iflib to set the number of segments in the TSO
231 	 * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the
232 	 * related ifnet parameter.
233 	 */
234 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
235 
236 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
237 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
238 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
239 
240 	.isc_nfl = 1,
241 	.isc_ntxqs = 1,
242 	.isc_nrxqs = 1,
243 
244 	.isc_admin_intrcnt = 1,
245 	.isc_vendor_info = ice_vendor_info_array,
246 	.isc_driver_version = __DECONST(char *, ice_driver_version),
247 	.isc_driver = &ice_iflib_driver,
248 
249 	/*
250 	 * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available
251 	 * for hardware checksum offload
252 	 *
253 	 * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the
254 	 * IP sum field, required by our hardware to calculate valid TSO
255 	 * checksums.
256 	 *
257 	 * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs
258 	 * even when the interface is down.
259 	 *
260 	 * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X
261 	 * vectors manually instead of relying on iflib code to do this.
262 	 */
263 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
264 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX,
265 
266 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
267 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
268 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
269 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
270 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
271 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
272 };
273 
274 DRIVER_MODULE(ice, pci, ice_driver, ice_module_event_handler, NULL);
275 
276 MODULE_VERSION(ice, 1);
277 MODULE_DEPEND(ice, pci, 1, 1, 1);
278 MODULE_DEPEND(ice, ether, 1, 1, 1);
279 MODULE_DEPEND(ice, iflib, 1, 1, 1);
280 
281 IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array);
282 
283 /* Static driver-wide sysctls */
284 #include "ice_iflib_sysctls.h"
285 
286 /**
287  * ice_pci_mapping - Map PCI BAR memory
288  * @sc: device private softc
289  *
290  * Map PCI BAR 0 for device operation.
291  */
292 static int
293 ice_pci_mapping(struct ice_softc *sc)
294 {
295 	int rc;
296 
297 	/* Map BAR0 */
298 	rc = ice_map_bar(sc->dev, &sc->bar0, 0);
299 	if (rc)
300 		return rc;
301 
302 	return 0;
303 }
304 
305 /**
306  * ice_free_pci_mapping - Release PCI BAR memory
307  * @sc: device private softc
308  *
309  * Release PCI BARs which were previously mapped by ice_pci_mapping().
310  */
311 static void
312 ice_free_pci_mapping(struct ice_softc *sc)
313 {
314 	/* Free BAR0 */
315 	ice_free_bar(sc->dev, &sc->bar0);
316 }
317 
318 /*
319  * Device methods
320  */
321 
322 /**
323  * ice_register - register device method callback
324  * @dev: the device being registered
325  *
326  * Returns a pointer to the shared context structure, which is used by iflib.
327  */
328 static void *
329 ice_register(device_t dev __unused)
330 {
331 	return &ice_sctx;
332 } /* ice_register */
333 
334 /**
335  * ice_setup_scctx - Setup the iflib softc context structure
336  * @sc: the device private structure
337  *
338  * Setup the parameters in if_softc_ctx_t structure used by the iflib stack
339  * when loading.
340  */
341 static void
342 ice_setup_scctx(struct ice_softc *sc)
343 {
344 	if_softc_ctx_t scctx = sc->scctx;
345 	struct ice_hw *hw = &sc->hw;
346 	device_t dev = sc->dev;
347 	bool safe_mode, recovery_mode;
348 
349 	safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE);
350 	recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE);
351 
352 	/*
353 	 * If the driver loads in Safe mode or Recovery mode, limit iflib to
354 	 * a single queue pair.
355 	 */
356 	if (safe_mode || recovery_mode) {
357 		scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1;
358 		scctx->isc_ntxqsets_max = 1;
359 		scctx->isc_nrxqsets_max = 1;
360 	} else {
361 		/*
362 		 * iflib initially sets the isc_ntxqsets and isc_nrxqsets to
363 		 * the values of the override sysctls. Cache these initial
364 		 * values so that the driver can be aware of what the iflib
365 		 * sysctl value is when setting up MSI-X vectors.
366 		 */
367 		sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets;
368 		sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets;
369 
370 		if (scctx->isc_ntxqsets == 0)
371 			scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size;
372 		if (scctx->isc_nrxqsets == 0)
373 			scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size;
374 
375 		scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq;
376 		scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq;
377 
378 		/*
379 		 * Sanity check that the iflib sysctl values are within the
380 		 * maximum supported range.
381 		 */
382 		if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max)
383 			sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max;
384 		if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max)
385 			sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max;
386 	}
387 
388 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
389 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
390 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
391 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
392 
393 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
394 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
395 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
396 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
397 
398 	scctx->isc_msix_bar = pci_msix_table_bar(dev);
399 	scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size;
400 
401 	/*
402 	 * If the driver loads in recovery mode, disable Tx/Rx functionality
403 	 */
404 	if (recovery_mode)
405 		scctx->isc_txrx = &ice_recovery_txrx;
406 	else
407 		scctx->isc_txrx = &ice_txrx;
408 
409 	/*
410 	 * If the driver loads in Safe mode or Recovery mode, disable
411 	 * advanced features including hardware offloads.
412 	 */
413 	if (safe_mode || recovery_mode) {
414 		scctx->isc_capenable = ICE_SAFE_CAPS;
415 		scctx->isc_tx_csum_flags = 0;
416 	} else {
417 		scctx->isc_capenable = ICE_FULL_CAPS;
418 		scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
419 	}
420 
421 	scctx->isc_capabilities = scctx->isc_capenable;
422 } /* ice_setup_scctx */
423 
424 /**
425  * ice_if_attach_pre - Early device attach logic
426  * @ctx: the iflib context structure
427  *
428  * Called by iflib during the attach process. Earliest main driver entry
429  * point which performs necessary hardware and driver initialization. Called
430  * before the Tx and Rx queues are allocated.
431  */
432 static int
433 ice_if_attach_pre(if_ctx_t ctx)
434 {
435 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
436 	enum ice_fw_modes fw_mode;
437 	enum ice_status status;
438 	if_softc_ctx_t scctx;
439 	struct ice_hw *hw;
440 	device_t dev;
441 	int err;
442 
443 	device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n");
444 
445 	ice_set_state(&sc->state, ICE_STATE_ATTACHING);
446 
447 	sc->ctx = ctx;
448 	sc->media = iflib_get_media(ctx);
449 	sc->sctx = iflib_get_sctx(ctx);
450 	sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx);
451 
452 	dev = sc->dev = iflib_get_dev(ctx);
453 	scctx = sc->scctx = iflib_get_softc_ctx(ctx);
454 
455 	hw = &sc->hw;
456 	hw->back = sc;
457 
458 	snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name),
459 		 "%s:admin", device_get_nameunit(dev));
460 	mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF);
461 	callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0);
462 
463 	ASSERT_CTX_LOCKED(sc);
464 
465 	if (ice_pci_mapping(sc)) {
466 		err = (ENXIO);
467 		goto destroy_admin_timer;
468 	}
469 
470 	/* Save off the PCI information */
471 	ice_save_pci_info(hw, dev);
472 
473 	/* create tunables as early as possible */
474 	ice_add_device_tunables(sc);
475 
476 	/* Setup ControlQ lengths */
477 	ice_set_ctrlq_len(hw);
478 
479 reinit_hw:
480 
481 	fw_mode = ice_get_fw_mode(hw);
482 	if (fw_mode == ICE_FW_MODE_REC) {
483 		device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
484 
485 		err = ice_attach_pre_recovery_mode(sc);
486 		if (err)
487 			goto free_pci_mapping;
488 
489 		return (0);
490 	}
491 
492 	/* Initialize the hw data structure */
493 	status = ice_init_hw(hw);
494 	if (status) {
495 		if (status == ICE_ERR_FW_API_VER) {
496 			/* Enter recovery mode, so that the driver remains
497 			 * loaded. This way, if the system administrator
498 			 * cannot update the driver, they may still attempt to
499 			 * downgrade the NVM.
500 			 */
501 			err = ice_attach_pre_recovery_mode(sc);
502 			if (err)
503 				goto free_pci_mapping;
504 
505 			return (0);
506 		} else {
507 			err = EIO;
508 			device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n",
509 				      ice_status_str(status),
510 				      ice_aq_str(hw->adminq.sq_last_status));
511 		}
512 		goto free_pci_mapping;
513 	}
514 
515 	ice_init_device_features(sc);
516 
517 	/* Keep flag set by default */
518 	ice_set_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
519 
520 	/* Notify firmware of the device driver version */
521 	err = ice_send_version(sc);
522 	if (err)
523 		goto deinit_hw;
524 
525 	/*
526 	 * Success indicates a change was made that requires a reinitialization
527 	 * of the hardware
528 	 */
529 	err = ice_load_pkg_file(sc);
530 	if (err == ICE_SUCCESS) {
531 		ice_deinit_hw(hw);
532 		goto reinit_hw;
533 	}
534 
535 	err = ice_init_link_events(sc);
536 	if (err) {
537 		device_printf(dev, "ice_init_link_events failed: %s\n",
538 			      ice_err_str(err));
539 		goto deinit_hw;
540 	}
541 
542 	/* Initialize VLAN mode in FW; if dual VLAN mode is supported by the package
543 	 * and firmware, this will force them to use single VLAN mode.
544 	 */
545 	status = ice_set_vlan_mode(hw);
546 	if (status) {
547 		err = EIO;
548 		device_printf(dev, "Unable to initialize VLAN mode, err %s aq_err %s\n",
549 			      ice_status_str(status),
550 			      ice_aq_str(hw->adminq.sq_last_status));
551 		goto deinit_hw;
552 	}
553 
554 	ice_print_nvm_version(sc);
555 
556 	/* Setup the MAC address */
557 	iflib_set_mac(ctx, hw->port_info->mac.lan_addr);
558 
559 	/* Setup the iflib softc context structure */
560 	ice_setup_scctx(sc);
561 
562 	/* Initialize the Tx queue manager */
563 	err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq);
564 	if (err) {
565 		device_printf(dev, "Unable to initialize Tx queue manager: %s\n",
566 			      ice_err_str(err));
567 		goto deinit_hw;
568 	}
569 
570 	/* Initialize the Rx queue manager */
571 	err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq);
572 	if (err) {
573 		device_printf(dev, "Unable to initialize Rx queue manager: %s\n",
574 			      ice_err_str(err));
575 		goto free_tx_qmgr;
576 	}
577 
578 	/* Initialize the interrupt resource manager */
579 	err = ice_alloc_intr_tracking(sc);
580 	if (err)
581 		/* Errors are already printed */
582 		goto free_rx_qmgr;
583 
584 	/* Determine maximum number of VSIs we'll prepare for */
585 	sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE,
586 				    hw->func_caps.guar_num_vsi);
587 
588 	if (!sc->num_available_vsi) {
589 		err = EIO;
590 		device_printf(dev, "No VSIs allocated to host\n");
591 		goto free_intr_tracking;
592 	}
593 
594 	/* Allocate storage for the VSI pointers */
595 	sc->all_vsi = (struct ice_vsi **)
596 		malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi,
597 		       M_ICE, M_WAITOK | M_ZERO);
598 	if (!sc->all_vsi) {
599 		err = ENOMEM;
600 		device_printf(dev, "Unable to allocate VSI array\n");
601 		goto free_intr_tracking;
602 	}
603 
604 	/*
605 	 * Prepare the statically allocated primary PF VSI in the softc
606 	 * structure. Other VSIs will be dynamically allocated as needed.
607 	 */
608 	ice_setup_pf_vsi(sc);
609 
610 	err = ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max,
611 	    scctx->isc_nrxqsets_max);
612 	if (err) {
613 		device_printf(dev, "Unable to allocate VSI Queue maps\n");
614 		goto free_main_vsi;
615 	}
616 
617 	/* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */
618 	err = ice_allocate_msix(sc);
619 	if (err)
620 		goto free_main_vsi;
621 
622 	return 0;
623 
624 free_main_vsi:
625 	/* ice_release_vsi will free the queue maps if they were allocated */
626 	ice_release_vsi(&sc->pf_vsi);
627 	free(sc->all_vsi, M_ICE);
628 	sc->all_vsi = NULL;
629 free_intr_tracking:
630 	ice_free_intr_tracking(sc);
631 free_rx_qmgr:
632 	ice_resmgr_destroy(&sc->rx_qmgr);
633 free_tx_qmgr:
634 	ice_resmgr_destroy(&sc->tx_qmgr);
635 deinit_hw:
636 	ice_deinit_hw(hw);
637 free_pci_mapping:
638 	ice_free_pci_mapping(sc);
639 destroy_admin_timer:
640 	mtx_lock(&sc->admin_mtx);
641 	callout_stop(&sc->admin_timer);
642 	mtx_unlock(&sc->admin_mtx);
643 	mtx_destroy(&sc->admin_mtx);
644 	return err;
645 } /* ice_if_attach_pre */
646 
647 /**
648  * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery
649  * @sc: the device private softc
650  *
651  * Loads the device driver in limited Firmware Recovery mode, intended to
652  * allow users to update the firmware to attempt to recover the device.
653  *
654  * @remark We may enter recovery mode in case either (a) the firmware is
655  * detected to be in an invalid state and must be re-programmed, or (b) the
656  * driver detects that the loaded firmware has a non-compatible API version
657  * that the driver cannot operate with.
658  */
659 static int
660 ice_attach_pre_recovery_mode(struct ice_softc *sc)
661 {
662 	ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE);
663 
664 	/* Setup the iflib softc context */
665 	ice_setup_scctx(sc);
666 
667 	/* Setup the PF VSI back pointer */
668 	sc->pf_vsi.sc = sc;
669 
670 	/*
671 	 * We still need to allocate MSI-X vectors since we need one vector to
672 	 * run the administrative admin interrupt
673 	 */
674 	return ice_allocate_msix(sc);
675 }
676 
677 /**
678  * ice_update_link_status - notify OS of link state change
679  * @sc: device private softc structure
680  * @update_media: true if we should update media even if link didn't change
681  *
682  * Called to notify iflib core of link status changes. Should be called once
683  * during attach_post, and whenever link status changes during runtime.
684  *
685  * This call only updates the currently supported media types if the link
686  * status changed, or if update_media is set to true.
687  */
688 static void
689 ice_update_link_status(struct ice_softc *sc, bool update_media)
690 {
691 	struct ice_hw *hw = &sc->hw;
692 	enum ice_status status;
693 
694 	/* Never report link up when in recovery mode */
695 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
696 		return;
697 
698 	/* Report link status to iflib only once each time it changes */
699 	if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) {
700 		if (sc->link_up) { /* link is up */
701 			uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info);
702 
703 			if (!(hw->port_info->phy.link_info_old.link_info & ICE_AQ_LINK_UP))
704 				ice_set_default_local_lldp_mib(sc);
705 
706 			iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
707 			ice_rdma_link_change(sc, LINK_STATE_UP, baudrate);
708 
709 			ice_link_up_msg(sc);
710 		} else { /* link is down */
711 			iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
712 			ice_rdma_link_change(sc, LINK_STATE_DOWN, 0);
713 		}
714 		update_media = true;
715 	}
716 
717 	/* Update the supported media types */
718 	if (update_media && !ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
719 		status = ice_add_media_types(sc, sc->media);
720 		if (status)
721 			device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
722 				      ice_status_str(status),
723 				      ice_aq_str(hw->adminq.sq_last_status));
724 	}
725 }
726 
727 /**
728  * ice_if_attach_post - Late device attach logic
729  * @ctx: the iflib context structure
730  *
731  * Called by iflib to finish up attaching the device. Performs any attach
732  * logic which must wait until after the Tx and Rx queues have been
733  * allocated.
734  */
735 static int
736 ice_if_attach_post(if_ctx_t ctx)
737 {
738 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
739 	if_t ifp = iflib_get_ifp(ctx);
740 	enum ice_status status;
741 	int err;
742 
743 	ASSERT_CTX_LOCKED(sc);
744 
745 	/* We don't yet support loading if MSI-X is not supported */
746 	if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) {
747 		device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n");
748 		return (ENOTSUP);
749 	}
750 
751 	/* The ifnet structure hasn't yet been initialized when the attach_pre
752 	 * handler is called, so wait until attach_post to setup the
753 	 * isc_max_frame_size.
754 	 */
755 
756 	sc->ifp = ifp;
757 	sc->scctx->isc_max_frame_size = if_getmtu(ifp) +
758 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
759 
760 	/*
761 	 * If we are in recovery mode, only perform a limited subset of
762 	 * initialization to support NVM recovery.
763 	 */
764 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
765 		ice_attach_post_recovery_mode(sc);
766 		return (0);
767 	}
768 
769 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
770 
771 	err = ice_initialize_vsi(&sc->pf_vsi);
772 	if (err) {
773 		device_printf(sc->dev, "Unable to initialize Main VSI: %s\n",
774 			      ice_err_str(err));
775 		return err;
776 	}
777 
778 	/* Enable FW health event reporting */
779 	ice_init_health_events(sc);
780 
781 	/* Configure the main PF VSI for RSS */
782 	err = ice_config_rss(&sc->pf_vsi);
783 	if (err) {
784 		device_printf(sc->dev,
785 			      "Unable to configure RSS for the main VSI, err %s\n",
786 			      ice_err_str(err));
787 		return err;
788 	}
789 
790 	/* Configure switch to drop transmitted LLDP and PAUSE frames */
791 	err = ice_cfg_pf_ethertype_filters(sc);
792 	if (err)
793 		return err;
794 
795 	ice_get_and_print_bus_info(sc);
796 
797 	ice_set_link_management_mode(sc);
798 
799 	ice_init_saved_phy_cfg(sc);
800 
801 	ice_cfg_pba_num(sc);
802 
803 	/* Set a default value for PFC mode on attach since the FW state is unknown
804 	 * before sysctl tunables are executed and it can't be queried. This fixes an
805 	 * issue when loading the driver with the FW LLDP agent enabled but the FW
806 	 * was previously in DSCP PFC mode.
807 	 */
808 	status = ice_aq_set_pfc_mode(&sc->hw, ICE_AQC_PFC_VLAN_BASED_PFC, NULL);
809 	if (status != ICE_SUCCESS)
810 		device_printf(sc->dev, "Setting pfc mode failed, status %s\n", ice_status_str(status));
811 
812 	ice_add_device_sysctls(sc);
813 
814 	/* Get DCBX/LLDP state and start DCBX agent */
815 	ice_init_dcb_setup(sc);
816 
817 	/* Setup link configuration parameters */
818 	ice_init_link_configuration(sc);
819 	ice_update_link_status(sc, true);
820 
821 	/* Configure interrupt causes for the administrative interrupt */
822 	ice_configure_misc_interrupts(sc);
823 
824 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
825 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
826 
827 	err = ice_rdma_pf_attach(sc);
828 	if (err)
829 		return (err);
830 
831 	/* Start the admin timer */
832 	mtx_lock(&sc->admin_mtx);
833 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
834 	mtx_unlock(&sc->admin_mtx);
835 
836 	if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
837 		 !ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
838 		ice_set_state(&sc->state, ICE_STATE_FIRST_INIT_LINK);
839 
840 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
841 
842 	return 0;
843 } /* ice_if_attach_post */
844 
845 /**
846  * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery
847  * @sc: the device private softc
848  *
849  * Performs minimal work to prepare the driver to recover an NVM in case the
850  * firmware is in recovery mode.
851  */
852 static void
853 ice_attach_post_recovery_mode(struct ice_softc *sc)
854 {
855 	/* Configure interrupt causes for the administrative interrupt */
856 	ice_configure_misc_interrupts(sc);
857 
858 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
859 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
860 
861 	/* Start the admin timer */
862 	mtx_lock(&sc->admin_mtx);
863 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
864 	mtx_unlock(&sc->admin_mtx);
865 
866 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
867 }
868 
869 /**
870  * ice_free_irqvs - Free IRQ vector memory
871  * @sc: the device private softc structure
872  *
873  * Free IRQ vector memory allocated during ice_if_msix_intr_assign.
874  */
875 static void
876 ice_free_irqvs(struct ice_softc *sc)
877 {
878 	struct ice_vsi *vsi = &sc->pf_vsi;
879 	if_ctx_t ctx = sc->ctx;
880 	int i;
881 
882 	/* If the irqvs array is NULL, then there are no vectors to free */
883 	if (sc->irqvs == NULL)
884 		return;
885 
886 	/* Free the IRQ vectors */
887 	for (i = 0; i < sc->num_irq_vectors; i++)
888 		iflib_irq_free(ctx, &sc->irqvs[i].irq);
889 
890 	/* Clear the irqv pointers */
891 	for (i = 0; i < vsi->num_rx_queues; i++)
892 		vsi->rx_queues[i].irqv = NULL;
893 
894 	for (i = 0; i < vsi->num_tx_queues; i++)
895 		vsi->tx_queues[i].irqv = NULL;
896 
897 	/* Release the vector array memory */
898 	free(sc->irqvs, M_ICE);
899 	sc->irqvs = NULL;
900 	sc->num_irq_vectors = 0;
901 }
902 
903 /**
904  * ice_if_detach - Device driver detach logic
905  * @ctx: iflib context structure
906  *
907  * Perform device shutdown logic to detach the device driver.
908  *
909  * Note that there is no guarantee of the ordering of ice_if_queues_free() and
910  * ice_if_detach(). It is possible for the functions to be called in either
911  * order, and they must not assume to have a strict ordering.
912  */
913 static int
914 ice_if_detach(if_ctx_t ctx)
915 {
916 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
917 	struct ice_vsi *vsi = &sc->pf_vsi;
918 	enum ice_status status;
919 	int i;
920 
921 	ASSERT_CTX_LOCKED(sc);
922 
923 	/* Indicate that we're detaching */
924 	ice_set_state(&sc->state, ICE_STATE_DETACHING);
925 
926 	/* Stop the admin timer */
927 	mtx_lock(&sc->admin_mtx);
928 	callout_stop(&sc->admin_timer);
929 	mtx_unlock(&sc->admin_mtx);
930 	mtx_destroy(&sc->admin_mtx);
931 
932 	ice_rdma_pf_detach(sc);
933 
934 	/* Free allocated media types */
935 	ifmedia_removeall(sc->media);
936 
937 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
938 	 * pointers. Note, the calls here and those in ice_if_queues_free()
939 	 * are *BOTH* necessary, as we cannot guarantee which path will be
940 	 * run first
941 	 */
942 	ice_vsi_del_txqs_ctx(vsi);
943 	ice_vsi_del_rxqs_ctx(vsi);
944 
945 	/* Release MSI-X resources */
946 	ice_free_irqvs(sc);
947 
948 	for (i = 0; i < sc->num_available_vsi; i++) {
949 		if (sc->all_vsi[i])
950 			ice_release_vsi(sc->all_vsi[i]);
951 	}
952 
953 	if (sc->all_vsi) {
954 		free(sc->all_vsi, M_ICE);
955 		sc->all_vsi = NULL;
956 	}
957 
958 	/* Release MSI-X memory */
959 	pci_release_msi(sc->dev);
960 
961 	if (sc->msix_table != NULL) {
962 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
963 				     rman_get_rid(sc->msix_table),
964 				     sc->msix_table);
965 		sc->msix_table = NULL;
966 	}
967 
968 	ice_free_intr_tracking(sc);
969 
970 	/* Destroy the queue managers */
971 	ice_resmgr_destroy(&sc->tx_qmgr);
972 	ice_resmgr_destroy(&sc->rx_qmgr);
973 
974 	if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
975 		ice_deinit_hw(&sc->hw);
976 
977 	IFLIB_CTX_UNLOCK(sc);
978 	status = ice_reset(&sc->hw, ICE_RESET_PFR);
979 	IFLIB_CTX_LOCK(sc);
980 	if (status) {
981 		device_printf(sc->dev, "device PF reset failed, err %s\n",
982 			      ice_status_str(status));
983 	}
984 
985 	ice_free_pci_mapping(sc);
986 
987 	return 0;
988 } /* ice_if_detach */
989 
990 /**
991  * ice_if_tx_queues_alloc - Allocate Tx queue memory
992  * @ctx: iflib context structure
993  * @vaddrs: virtual addresses for the queue memory
994  * @paddrs: physical addresses for the queue memory
995  * @ntxqs: the number of Tx queues per set (should always be 1)
996  * @ntxqsets: the number of Tx queue sets to allocate
997  *
998  * Called by iflib to allocate Tx queues for the device. Allocates driver
999  * memory to track each queue, the status arrays used for descriptor
1000  * status reporting, and Tx queue sysctls.
1001  */
1002 static int
1003 ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1004 		       int __invariant_only ntxqs, int ntxqsets)
1005 {
1006 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1007 	struct ice_vsi *vsi = &sc->pf_vsi;
1008 	struct ice_tx_queue *txq;
1009 	int err, i, j;
1010 
1011 	MPASS(ntxqs == 1);
1012 	MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
1013 	ASSERT_CTX_LOCKED(sc);
1014 
1015 	/* Do not bother allocating queues if we're in recovery mode */
1016 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1017 		return (0);
1018 
1019 	/* Allocate queue structure memory */
1020 	if (!(vsi->tx_queues =
1021 	      (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1022 		device_printf(sc->dev, "Unable to allocate Tx queue memory\n");
1023 		return (ENOMEM);
1024 	}
1025 
1026 	/* Allocate report status arrays */
1027 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1028 		if (!(txq->tx_rsq =
1029 		      (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
1030 			device_printf(sc->dev, "Unable to allocate tx_rsq memory\n");
1031 			err = ENOMEM;
1032 			goto free_tx_queues;
1033 		}
1034 		/* Initialize report status array */
1035 		for (j = 0; j < sc->scctx->isc_ntxd[0]; j++)
1036 			txq->tx_rsq[j] = QIDX_INVALID;
1037 	}
1038 
1039 	/* Assign queues from PF space to the main VSI */
1040 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets);
1041 	if (err) {
1042 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1043 			      ice_err_str(err));
1044 		goto free_tx_queues;
1045 	}
1046 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1047 
1048 	/* Add Tx queue sysctls context */
1049 	ice_vsi_add_txqs_ctx(vsi);
1050 
1051 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1052 		/* q_handle == me when only one TC */
1053 		txq->me = txq->q_handle = i;
1054 		txq->vsi = vsi;
1055 
1056 		/* store the queue size for easier access */
1057 		txq->desc_count = sc->scctx->isc_ntxd[0];
1058 
1059 		/* get the virtual and physical address of the hardware queues */
1060 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
1061 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
1062 		txq->tx_paddr = paddrs[i];
1063 
1064 		ice_add_txq_sysctls(txq);
1065 	}
1066 
1067 	vsi->num_tx_queues = ntxqsets;
1068 
1069 	return (0);
1070 
1071 free_tx_queues:
1072 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1073 		if (txq->tx_rsq != NULL) {
1074 			free(txq->tx_rsq, M_ICE);
1075 			txq->tx_rsq = NULL;
1076 		}
1077 	}
1078 	free(vsi->tx_queues, M_ICE);
1079 	vsi->tx_queues = NULL;
1080 	return err;
1081 }
1082 
1083 /**
1084  * ice_if_rx_queues_alloc - Allocate Rx queue memory
1085  * @ctx: iflib context structure
1086  * @vaddrs: virtual addresses for the queue memory
1087  * @paddrs: physical addresses for the queue memory
1088  * @nrxqs: number of Rx queues per set (should always be 1)
1089  * @nrxqsets: number of Rx queue sets to allocate
1090  *
1091  * Called by iflib to allocate Rx queues for the device. Allocates driver
1092  * memory to track each queue, as well as sets up the Rx queue sysctls.
1093  */
1094 static int
1095 ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1096 		       int __invariant_only nrxqs, int nrxqsets)
1097 {
1098 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1099 	struct ice_vsi *vsi = &sc->pf_vsi;
1100 	struct ice_rx_queue *rxq;
1101 	int err, i;
1102 
1103 	MPASS(nrxqs == 1);
1104 	MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
1105 	ASSERT_CTX_LOCKED(sc);
1106 
1107 	/* Do not bother allocating queues if we're in recovery mode */
1108 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1109 		return (0);
1110 
1111 	/* Allocate queue structure memory */
1112 	if (!(vsi->rx_queues =
1113 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1114 		device_printf(sc->dev, "Unable to allocate Rx queue memory\n");
1115 		return (ENOMEM);
1116 	}
1117 
1118 	/* Assign queues from PF space to the main VSI */
1119 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets);
1120 	if (err) {
1121 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1122 			      ice_err_str(err));
1123 		goto free_rx_queues;
1124 	}
1125 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1126 
1127 	/* Add Rx queue sysctls context */
1128 	ice_vsi_add_rxqs_ctx(vsi);
1129 
1130 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
1131 		rxq->me = i;
1132 		rxq->vsi = vsi;
1133 
1134 		/* store the queue size for easier access */
1135 		rxq->desc_count = sc->scctx->isc_nrxd[0];
1136 
1137 		/* get the virtual and physical address of the hardware queues */
1138 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
1139 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
1140 		rxq->rx_paddr = paddrs[i];
1141 
1142 		ice_add_rxq_sysctls(rxq);
1143 	}
1144 
1145 	vsi->num_rx_queues = nrxqsets;
1146 
1147 	return (0);
1148 
1149 free_rx_queues:
1150 	free(vsi->rx_queues, M_ICE);
1151 	vsi->rx_queues = NULL;
1152 	return err;
1153 }
1154 
1155 /**
1156  * ice_if_queues_free - Free queue memory
1157  * @ctx: the iflib context structure
1158  *
1159  * Free queue memory allocated by ice_if_tx_queues_alloc() and
1160  * ice_if_rx_queues_alloc().
1161  *
1162  * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be
1163  * called in the same order. It's possible for ice_if_queues_free() to be
1164  * called prior to ice_if_detach(), and vice versa.
1165  *
1166  * For this reason, the main VSI is a static member of the ice_softc, which is
1167  * not free'd until after iflib finishes calling both of these functions.
1168  *
1169  * Thus, care must be taken in how we manage the memory being freed by this
1170  * function, and in what tasks it can and must perform.
1171  */
1172 static void
1173 ice_if_queues_free(if_ctx_t ctx)
1174 {
1175 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1176 	struct ice_vsi *vsi = &sc->pf_vsi;
1177 	struct ice_tx_queue *txq;
1178 	int i;
1179 
1180 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
1181 	 * pointers. Note, the calls here and those in ice_if_detach()
1182 	 * are *BOTH* necessary, as we cannot guarantee which path will be
1183 	 * run first
1184 	 */
1185 	ice_vsi_del_txqs_ctx(vsi);
1186 	ice_vsi_del_rxqs_ctx(vsi);
1187 
1188 	/* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */
1189 	ice_free_irqvs(sc);
1190 
1191 	if (vsi->tx_queues != NULL) {
1192 		/* free the tx_rsq arrays */
1193 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1194 			if (txq->tx_rsq != NULL) {
1195 				free(txq->tx_rsq, M_ICE);
1196 				txq->tx_rsq = NULL;
1197 			}
1198 		}
1199 		free(vsi->tx_queues, M_ICE);
1200 		vsi->tx_queues = NULL;
1201 		vsi->num_tx_queues = 0;
1202 	}
1203 	if (vsi->rx_queues != NULL) {
1204 		free(vsi->rx_queues, M_ICE);
1205 		vsi->rx_queues = NULL;
1206 		vsi->num_rx_queues = 0;
1207 	}
1208 }
1209 
1210 /**
1211  * ice_msix_que - Fast interrupt handler for MSI-X receive queues
1212  * @arg: The Rx queue memory
1213  *
1214  * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when
1215  * an MSI-X interrupt for a given queue is triggered. Currently this just asks
1216  * iflib to schedule the main Rx thread.
1217  */
1218 static int
1219 ice_msix_que(void *arg)
1220 {
1221 	struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg;
1222 
1223 	/* TODO: dynamic ITR algorithm?? */
1224 
1225 	return (FILTER_SCHEDULE_THREAD);
1226 }
1227 
1228 /**
1229  * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt
1230  * @arg: pointer to device softc memory
1231  *
1232  * Called by iflib when an administrative interrupt occurs. Should perform any
1233  * fast logic for handling the interrupt cause, and then indicate whether the
1234  * admin task needs to be queued.
1235  */
1236 static int
1237 ice_msix_admin(void *arg)
1238 {
1239 	struct ice_softc *sc = (struct ice_softc *)arg;
1240 	struct ice_hw *hw = &sc->hw;
1241 	device_t dev = sc->dev;
1242 	u32 oicr;
1243 
1244 	/* There is no safe way to modify the enabled miscellaneous causes of
1245 	 * the OICR vector at runtime, as doing so would be prone to race
1246 	 * conditions. Reading PFINT_OICR will unmask the associated interrupt
1247 	 * causes and allow future interrupts to occur. The admin interrupt
1248 	 * vector will not be re-enabled until after we exit this function,
1249 	 * but any delayed tasks must be resilient against possible "late
1250 	 * arrival" interrupts that occur while we're already handling the
1251 	 * task. This is done by using state bits and serializing these
1252 	 * delayed tasks via the admin status task function.
1253 	 */
1254 	oicr = rd32(hw, PFINT_OICR);
1255 
1256 	/* Processing multiple controlq interrupts on a single vector does not
1257 	 * provide an indication of which controlq triggered the interrupt.
1258 	 * We might try reading the INTEVENT bit of the respective PFINT_*_CTL
1259 	 * registers. However, the INTEVENT bit is not guaranteed to be set as
1260 	 * it gets automatically cleared when the hardware acknowledges the
1261 	 * interrupt.
1262 	 *
1263 	 * This means we don't really have a good indication of whether or
1264 	 * which controlq triggered this interrupt. We'll just notify the
1265 	 * admin task that it should check all the controlqs.
1266 	 */
1267 	ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
1268 
1269 	if (oicr & PFINT_OICR_VFLR_M) {
1270 		ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING);
1271 	}
1272 
1273 	if (oicr & PFINT_OICR_MAL_DETECT_M) {
1274 		ice_set_state(&sc->state, ICE_STATE_MDD_PENDING);
1275 	}
1276 
1277 	if (oicr & PFINT_OICR_GRST_M) {
1278 		u32 reset;
1279 
1280 		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
1281 			GLGEN_RSTAT_RESET_TYPE_S;
1282 
1283 		if (reset == ICE_RESET_CORER)
1284 			sc->soft_stats.corer_count++;
1285 		else if (reset == ICE_RESET_GLOBR)
1286 			sc->soft_stats.globr_count++;
1287 		else
1288 			sc->soft_stats.empr_count++;
1289 
1290 		/* There are a couple of bits at play for handling resets.
1291 		 * First, the ICE_STATE_RESET_OICR_RECV bit is used to
1292 		 * indicate that the driver has received an OICR with a reset
1293 		 * bit active, indicating that a CORER/GLOBR/EMPR is about to
1294 		 * happen. Second, we set hw->reset_ongoing to indicate that
1295 		 * the hardware is in reset. We will set this back to false as
1296 		 * soon as the driver has determined that the hardware is out
1297 		 * of reset.
1298 		 *
1299 		 * If the driver wishes to trigger a request, it can set one of
1300 		 * the ICE_STATE_RESET_*_REQ bits, which will trigger the
1301 		 * correct type of reset.
1302 		 */
1303 		if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
1304 			hw->reset_ongoing = true;
1305 	}
1306 
1307 	if (oicr & PFINT_OICR_ECC_ERR_M) {
1308 		device_printf(dev, "ECC Error detected!\n");
1309 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1310 	}
1311 
1312 	if (oicr & (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M)) {
1313 		if (oicr & PFINT_OICR_HMC_ERR_M)
1314 			/* Log the HMC errors */
1315 			ice_log_hmc_error(hw, dev);
1316 		ice_rdma_notify_pe_intr(sc, oicr);
1317 	}
1318 
1319 	if (oicr & PFINT_OICR_PCI_EXCEPTION_M) {
1320 		device_printf(dev, "PCI Exception detected!\n");
1321 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1322 	}
1323 
1324 	return (FILTER_SCHEDULE_THREAD);
1325 }
1326 
1327 /**
1328  * ice_allocate_msix - Allocate MSI-X vectors for the interface
1329  * @sc: the device private softc
1330  *
1331  * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process.
1332  *
1333  * First, determine a suitable total number of vectors based on the number
1334  * of CPUs, RSS buckets, the administrative vector, and other demands such as
1335  * RDMA.
1336  *
1337  * Request the desired amount of vectors, and see how many we obtain. If we
1338  * don't obtain as many as desired, reduce the demands by lowering the number
1339  * of requested queues or reducing the demand from other features such as
1340  * RDMA.
1341  *
1342  * @remark This function is required because the driver sets the
1343  * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors
1344  * manually.
1345  *
1346  * @remark This driver will only use MSI-X vectors. If this is not possible,
1347  * neither MSI or legacy interrupts will be tried.
1348  *
1349  * @post on success this function must set the following scctx parameters:
1350  * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr.
1351  *
1352  * @returns zero on success or an error code on failure.
1353  */
1354 static int
1355 ice_allocate_msix(struct ice_softc *sc)
1356 {
1357 	bool iflib_override_queue_count = false;
1358 	if_softc_ctx_t scctx = sc->scctx;
1359 	device_t dev = sc->dev;
1360 	cpuset_t cpus;
1361 	int bar, queues, vectors, requested;
1362 	int err = 0;
1363 	int rdma;
1364 
1365 	/* Allocate the MSI-X bar */
1366 	bar = scctx->isc_msix_bar;
1367 	sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE);
1368 	if (!sc->msix_table) {
1369 		device_printf(dev, "Unable to map MSI-X table\n");
1370 		return (ENOMEM);
1371 	}
1372 
1373 	/* Check if the iflib queue count sysctls have been set */
1374 	if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs)
1375 		iflib_override_queue_count = true;
1376 
1377 	err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus);
1378 	if (err) {
1379 		device_printf(dev, "%s: Unable to fetch the CPU list: %s\n",
1380 			      __func__, ice_err_str(err));
1381 		CPU_COPY(&all_cpus, &cpus);
1382 	}
1383 
1384 	/* Attempt to mimic behavior of iflib_msix_init */
1385 	if (iflib_override_queue_count) {
1386 		/*
1387 		 * If the override sysctls have been set, limit the queues to
1388 		 * the number of logical CPUs.
1389 		 */
1390 		queues = mp_ncpus;
1391 	} else {
1392 		/*
1393 		 * Otherwise, limit the queue count to the CPUs associated
1394 		 * with the NUMA node the device is associated with.
1395 		 */
1396 		queues = CPU_COUNT(&cpus);
1397 	}
1398 
1399 	/* Clamp to the number of RSS buckets */
1400 	queues = imin(queues, rss_getnumbuckets());
1401 
1402 	/*
1403 	 * Clamp the number of queue pairs to the minimum of the requested Tx
1404 	 * and Rx queues.
1405 	 */
1406 	queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
1407 	queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
1408 
1409 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) {
1410 		/*
1411 		 * Choose a number of RDMA vectors based on the number of CPUs
1412 		 * up to a maximum
1413 		 */
1414 		rdma = min(CPU_COUNT(&cpus), ICE_RDMA_MAX_MSIX);
1415 
1416 		/* Further limit by the user configurable tunable */
1417 		rdma = min(rdma, ice_rdma_max_msix);
1418 	} else {
1419 		rdma = 0;
1420 	}
1421 
1422 	/*
1423 	 * Determine the number of vectors to request. Note that we also need
1424 	 * to allocate one vector for administrative tasks.
1425 	 */
1426 	requested = rdma + queues + 1;
1427 
1428 	vectors = requested;
1429 
1430 	err = pci_alloc_msix(dev, &vectors);
1431 	if (err) {
1432 		device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n",
1433 			      vectors, ice_err_str(err));
1434 		goto err_free_msix_table;
1435 	}
1436 
1437 	/* If we don't receive enough vectors, reduce demands */
1438 	if (vectors < requested) {
1439 		int diff = requested - vectors;
1440 
1441 		device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n",
1442 			      requested, vectors);
1443 
1444 		/*
1445 		 * The OS didn't grant us the requested number of vectors.
1446 		 * Check to see if we can reduce demands by limiting the
1447 		 * number of vectors allocated to certain features.
1448 		 */
1449 
1450 		if (rdma >= diff) {
1451 			/* Reduce the number of RDMA vectors we reserve */
1452 			rdma -= diff;
1453 			diff = 0;
1454 		} else {
1455 			/* Disable RDMA and reduce the difference */
1456 			ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
1457 			diff -= rdma;
1458 			rdma = 0;
1459 		}
1460 
1461 		/*
1462 		 * If we still have a difference, we need to reduce the number
1463 		 * of queue pairs.
1464 		 *
1465 		 * However, we still need at least one vector for the admin
1466 		 * interrupt and one queue pair.
1467 		 */
1468 		if (queues <= diff) {
1469 			device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n");
1470 			err = (ERANGE);
1471 			goto err_pci_release_msi;
1472 		}
1473 
1474 		queues -= diff;
1475 	}
1476 
1477 	device_printf(dev, "Using %d Tx and Rx queues\n", queues);
1478 	if (rdma)
1479 		device_printf(dev, "Reserving %d MSI-X interrupts for iRDMA\n",
1480 			      rdma);
1481 	device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
1482 		      vectors);
1483 
1484 	scctx->isc_vectors = vectors;
1485 	scctx->isc_nrxqsets = queues;
1486 	scctx->isc_ntxqsets = queues;
1487 	scctx->isc_intr = IFLIB_INTR_MSIX;
1488 
1489 	sc->irdma_vectors = rdma;
1490 
1491 	/* Interrupt allocation tracking isn't required in recovery mode,
1492 	 * since neither RDMA nor VFs are enabled.
1493 	 */
1494 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1495 		return (0);
1496 
1497 	/* Keep track of which interrupt indices are being used for what */
1498 	sc->lan_vectors = vectors - rdma;
1499 	err = ice_resmgr_assign_contiguous(&sc->imgr, sc->pf_imap, sc->lan_vectors);
1500 	if (err) {
1501 		device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
1502 			      ice_err_str(err));
1503 		goto err_pci_release_msi;
1504 	}
1505 	err = ice_resmgr_assign_contiguous(&sc->imgr, sc->rdma_imap, rdma);
1506 	if (err) {
1507 		device_printf(dev, "Unable to assign PF RDMA interrupt mapping: %s\n",
1508 			      ice_err_str(err));
1509 		ice_resmgr_release_map(&sc->imgr, sc->pf_imap,
1510 					    sc->lan_vectors);
1511 		goto err_pci_release_msi;
1512 	}
1513 
1514 	return (0);
1515 
1516 err_pci_release_msi:
1517 	pci_release_msi(dev);
1518 err_free_msix_table:
1519 	if (sc->msix_table != NULL) {
1520 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
1521 				rman_get_rid(sc->msix_table),
1522 				sc->msix_table);
1523 		sc->msix_table = NULL;
1524 	}
1525 
1526 	return (err);
1527 }
1528 
1529 /**
1530  * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues
1531  * @ctx: the iflib context structure
1532  * @msix: the number of vectors we were assigned
1533  *
1534  * Called by iflib to assign MSI-X vectors to queues. Currently requires that
1535  * we get at least the same number of vectors as we have queues, and that we
1536  * always have the same number of Tx and Rx queues.
1537  *
1538  * Tx queues use a softirq instead of using their own hardware interrupt.
1539  */
1540 static int
1541 ice_if_msix_intr_assign(if_ctx_t ctx, int msix)
1542 {
1543 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1544 	struct ice_vsi *vsi = &sc->pf_vsi;
1545 	int err, i, vector;
1546 
1547 	ASSERT_CTX_LOCKED(sc);
1548 
1549 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
1550 		device_printf(sc->dev,
1551 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
1552 			      vsi->num_tx_queues, vsi->num_rx_queues);
1553 		return (EOPNOTSUPP);
1554 	}
1555 
1556 	if (msix < (vsi->num_rx_queues + 1)) {
1557 		device_printf(sc->dev,
1558 			      "Not enough MSI-X vectors to assign one vector to each queue pair\n");
1559 		return (EOPNOTSUPP);
1560 	}
1561 
1562 	/* Save the number of vectors for future use */
1563 	sc->num_irq_vectors = vsi->num_rx_queues + 1;
1564 
1565 	/* Allocate space to store the IRQ vector data */
1566 	if (!(sc->irqvs =
1567 	      (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors),
1568 					       M_ICE, M_NOWAIT))) {
1569 		device_printf(sc->dev,
1570 			      "Unable to allocate irqv memory\n");
1571 		return (ENOMEM);
1572 	}
1573 
1574 	/* Administrative interrupt events will use vector 0 */
1575 	err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN,
1576 				      ice_msix_admin, sc, 0, "admin");
1577 	if (err) {
1578 		device_printf(sc->dev,
1579 			      "Failed to register Admin queue handler: %s\n",
1580 			      ice_err_str(err));
1581 		goto free_irqvs;
1582 	}
1583 	sc->irqvs[0].me = 0;
1584 
1585 	/* Do not allocate queue interrupts when in recovery mode */
1586 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1587 		return (0);
1588 
1589 	for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) {
1590 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1591 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1592 		int rid = vector + 1;
1593 		char irq_name[16];
1594 
1595 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
1596 		err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid,
1597 					      IFLIB_INTR_RXTX, ice_msix_que,
1598 					      rxq, rxq->me, irq_name);
1599 		if (err) {
1600 			device_printf(sc->dev,
1601 				      "Failed to allocate q int %d err: %s\n",
1602 				      i, ice_err_str(err));
1603 			vector--;
1604 			i--;
1605 			goto fail;
1606 		}
1607 		sc->irqvs[vector].me = vector;
1608 		rxq->irqv = &sc->irqvs[vector];
1609 
1610 		bzero(irq_name, sizeof(irq_name));
1611 
1612 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
1613 		iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq,
1614 					    IFLIB_INTR_TX, txq,
1615 					    txq->me, irq_name);
1616 		txq->irqv = &sc->irqvs[vector];
1617 	}
1618 
1619 	return (0);
1620 fail:
1621 	for (; i >= 0; i--, vector--)
1622 		iflib_irq_free(ctx, &sc->irqvs[vector].irq);
1623 	iflib_irq_free(ctx, &sc->irqvs[0].irq);
1624 free_irqvs:
1625 	free(sc->irqvs, M_ICE);
1626 	sc->irqvs = NULL;
1627 	return err;
1628 }
1629 
1630 /**
1631  * ice_if_mtu_set - Set the device MTU
1632  * @ctx: iflib context structure
1633  * @mtu: the MTU requested
1634  *
1635  * Called by iflib to configure the device's Maximum Transmission Unit (MTU).
1636  *
1637  * @pre assumes the caller holds the iflib CTX lock
1638  */
1639 static int
1640 ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu)
1641 {
1642 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1643 
1644 	ASSERT_CTX_LOCKED(sc);
1645 
1646 	/* Do not support configuration when in recovery mode */
1647 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1648 		return (ENOSYS);
1649 
1650 	if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU)
1651 		return (EINVAL);
1652 
1653 	sc->scctx->isc_max_frame_size = mtu +
1654 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1655 
1656 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
1657 
1658 	return (0);
1659 }
1660 
1661 /**
1662  * ice_if_intr_enable - Enable device interrupts
1663  * @ctx: iflib context structure
1664  *
1665  * Called by iflib to request enabling device interrupts.
1666  */
1667 static void
1668 ice_if_intr_enable(if_ctx_t ctx)
1669 {
1670 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1671 	struct ice_vsi *vsi = &sc->pf_vsi;
1672 	struct ice_hw *hw = &sc->hw;
1673 
1674 	ASSERT_CTX_LOCKED(sc);
1675 
1676 	/* Enable ITR 0 */
1677 	ice_enable_intr(hw, sc->irqvs[0].me);
1678 
1679 	/* Do not enable queue interrupts in recovery mode */
1680 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1681 		return;
1682 
1683 	/* Enable all queue interrupts */
1684 	for (int i = 0; i < vsi->num_rx_queues; i++)
1685 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
1686 }
1687 
1688 /**
1689  * ice_if_intr_disable - Disable device interrupts
1690  * @ctx: iflib context structure
1691  *
1692  * Called by iflib to request disabling device interrupts.
1693  */
1694 static void
1695 ice_if_intr_disable(if_ctx_t ctx)
1696 {
1697 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1698 	struct ice_hw *hw = &sc->hw;
1699 	unsigned int i;
1700 
1701 	ASSERT_CTX_LOCKED(sc);
1702 
1703 	/* IFDI_INTR_DISABLE may be called prior to interrupts actually being
1704 	 * assigned to queues. Instead of assuming that the interrupt
1705 	 * assignment in the rx_queues structure is valid, just disable all
1706 	 * possible interrupts
1707 	 *
1708 	 * Note that we choose not to disable ITR 0 because this handles the
1709 	 * AdminQ interrupts, and we want to keep processing these even when
1710 	 * the interface is offline.
1711 	 */
1712 	for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++)
1713 		ice_disable_intr(hw, i);
1714 }
1715 
1716 /**
1717  * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
1718  * @ctx: iflib context structure
1719  * @rxqid: the Rx queue to enable
1720  *
1721  * Enable a specific Rx queue interrupt.
1722  *
1723  * This function is not protected by the iflib CTX lock.
1724  */
1725 static int
1726 ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
1727 {
1728 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1729 	struct ice_vsi *vsi = &sc->pf_vsi;
1730 	struct ice_hw *hw = &sc->hw;
1731 
1732 	/* Do not enable queue interrupts in recovery mode */
1733 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1734 		return (ENOSYS);
1735 
1736 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
1737 	return (0);
1738 }
1739 
1740 /**
1741  * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
1742  * @ctx: iflib context structure
1743  * @txqid: the Tx queue to enable
1744  *
1745  * Enable a specific Tx queue interrupt.
1746  *
1747  * This function is not protected by the iflib CTX lock.
1748  */
1749 static int
1750 ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
1751 {
1752 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1753 	struct ice_vsi *vsi = &sc->pf_vsi;
1754 	struct ice_hw *hw = &sc->hw;
1755 
1756 	/* Do not enable queue interrupts in recovery mode */
1757 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1758 		return (ENOSYS);
1759 
1760 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
1761 	return (0);
1762 }
1763 
1764 /**
1765  * ice_set_default_promisc_mask - Set default config for promisc settings
1766  * @promisc_mask: bitmask to setup
1767  *
1768  * The ice_(set|clear)_vsi_promisc() function expects a mask of promiscuous
1769  * modes to operate on. The mask used in here is the default one for the
1770  * driver, where promiscuous is enabled/disabled for all types of
1771  * non-VLAN-tagged/VLAN 0 traffic.
1772  */
1773 static void
1774 ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask)
1775 {
1776 	ice_zero_bitmap(promisc_mask, ICE_PROMISC_MAX);
1777 	ice_set_bit(ICE_PROMISC_UCAST_TX, promisc_mask);
1778 	ice_set_bit(ICE_PROMISC_UCAST_RX, promisc_mask);
1779 	ice_set_bit(ICE_PROMISC_MCAST_TX, promisc_mask);
1780 	ice_set_bit(ICE_PROMISC_MCAST_RX, promisc_mask);
1781 }
1782 
1783 /**
1784  * ice_if_promisc_set - Set device promiscuous mode
1785  * @ctx: iflib context structure
1786  * @flags: promiscuous flags to configure
1787  *
1788  * Called by iflib to configure device promiscuous mode.
1789  *
1790  * @remark Calls to this function will always overwrite the previous setting
1791  */
1792 static int
1793 ice_if_promisc_set(if_ctx_t ctx, int flags)
1794 {
1795 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1796 	struct ice_hw *hw = &sc->hw;
1797 	device_t dev = sc->dev;
1798 	enum ice_status status;
1799 	bool promisc_enable = flags & IFF_PROMISC;
1800 	bool multi_enable = flags & IFF_ALLMULTI;
1801 	ice_declare_bitmap(promisc_mask, ICE_PROMISC_MAX);
1802 
1803 	/* Do not support configuration when in recovery mode */
1804 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1805 		return (ENOSYS);
1806 
1807 	ice_set_default_promisc_mask(promisc_mask);
1808 
1809 	if (multi_enable)
1810 		return (EOPNOTSUPP);
1811 
1812 	if (promisc_enable) {
1813 		status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx,
1814 					     promisc_mask, 0);
1815 		if (status && status != ICE_ERR_ALREADY_EXISTS) {
1816 			device_printf(dev,
1817 				      "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n",
1818 				      ice_status_str(status),
1819 				      ice_aq_str(hw->adminq.sq_last_status));
1820 			return (EIO);
1821 		}
1822 	} else {
1823 		status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx,
1824 					       promisc_mask, 0);
1825 		if (status) {
1826 			device_printf(dev,
1827 				      "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n",
1828 				      ice_status_str(status),
1829 				      ice_aq_str(hw->adminq.sq_last_status));
1830 			return (EIO);
1831 		}
1832 	}
1833 
1834 	return (0);
1835 }
1836 
1837 /**
1838  * ice_if_media_change - Change device media
1839  * @ctx: device ctx structure
1840  *
1841  * Called by iflib when a media change is requested. This operation is not
1842  * supported by the hardware, so we just return an error code.
1843  */
1844 static int
1845 ice_if_media_change(if_ctx_t ctx)
1846 {
1847 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1848 
1849 	device_printf(sc->dev, "Media change is not supported.\n");
1850 	return (ENODEV);
1851 }
1852 
1853 /**
1854  * ice_if_media_status - Report current device media
1855  * @ctx: iflib context structure
1856  * @ifmr: ifmedia request structure to update
1857  *
1858  * Updates the provided ifmr with current device media status, including link
1859  * status and media type.
1860  */
1861 static void
1862 ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr)
1863 {
1864 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1865 	struct ice_link_status *li = &sc->hw.port_info->phy.link_info;
1866 
1867 	ifmr->ifm_status = IFM_AVALID;
1868 	ifmr->ifm_active = IFM_ETHER;
1869 
1870 	/* Never report link up or media types when in recovery mode */
1871 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1872 		return;
1873 
1874 	if (!sc->link_up)
1875 		return;
1876 
1877 	ifmr->ifm_status |= IFM_ACTIVE;
1878 	ifmr->ifm_active |= IFM_FDX;
1879 
1880 	if (li->phy_type_low)
1881 		ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low);
1882 	else if (li->phy_type_high)
1883 		ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high);
1884 	else
1885 		ifmr->ifm_active |= IFM_UNKNOWN;
1886 
1887 	/* Report flow control status as well */
1888 	if (li->an_info & ICE_AQ_LINK_PAUSE_TX)
1889 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1890 	if (li->an_info & ICE_AQ_LINK_PAUSE_RX)
1891 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1892 }
1893 
1894 /**
1895  * ice_init_tx_tracking - Initialize Tx queue software tracking values
1896  * @vsi: the VSI to initialize
1897  *
1898  * Initialize Tx queue software tracking values, including the Report Status
1899  * queue, and related software tracking values.
1900  */
1901 static void
1902 ice_init_tx_tracking(struct ice_vsi *vsi)
1903 {
1904 	struct ice_tx_queue *txq;
1905 	size_t j;
1906 	int i;
1907 
1908 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1909 
1910 		txq->tx_rs_cidx = txq->tx_rs_pidx = 0;
1911 
1912 		/* Initialize the last processed descriptor to be the end of
1913 		 * the ring, rather than the start, so that we avoid an
1914 		 * off-by-one error in ice_ift_txd_credits_update for the
1915 		 * first packet.
1916 		 */
1917 		txq->tx_cidx_processed = txq->desc_count - 1;
1918 
1919 		for (j = 0; j < txq->desc_count; j++)
1920 			txq->tx_rsq[j] = QIDX_INVALID;
1921 	}
1922 }
1923 
1924 /**
1925  * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues
1926  * @sc: the device softc
1927  *
1928  * Called to update the Rx queue mbuf_sz parameter for configuring the receive
1929  * buffer sizes when programming hardware.
1930  */
1931 static void
1932 ice_update_rx_mbuf_sz(struct ice_softc *sc)
1933 {
1934 	uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx);
1935 	struct ice_vsi *vsi = &sc->pf_vsi;
1936 
1937 	MPASS(mbuf_sz <= UINT16_MAX);
1938 	vsi->mbuf_sz = mbuf_sz;
1939 }
1940 
1941 /**
1942  * ice_if_init - Initialize the device
1943  * @ctx: iflib ctx structure
1944  *
1945  * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes
1946  * device filters and prepares the Tx and Rx engines.
1947  *
1948  * @pre assumes the caller holds the iflib CTX lock
1949  */
1950 static void
1951 ice_if_init(if_ctx_t ctx)
1952 {
1953 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1954 	device_t dev = sc->dev;
1955 	int err;
1956 
1957 	ASSERT_CTX_LOCKED(sc);
1958 
1959 	/*
1960 	 * We've seen an issue with 11.3/12.1 where sideband routines are
1961 	 * called after detach is called.  This would call routines after
1962 	 * if_stop, causing issues with the teardown process.  This has
1963 	 * seemingly been fixed in STABLE snapshots, but it seems like a
1964 	 * good idea to have this guard here regardless.
1965 	 */
1966 	if (ice_driver_is_detaching(sc))
1967 		return;
1968 
1969 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1970 		return;
1971 
1972 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
1973 		device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n");
1974 		return;
1975 	}
1976 
1977 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
1978 		device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n");
1979 		return;
1980 	}
1981 
1982 	ice_update_rx_mbuf_sz(sc);
1983 
1984 	/* Update the MAC address... User might use a LAA */
1985 	err = ice_update_laa_mac(sc);
1986 	if (err) {
1987 		device_printf(dev,
1988 			      "LAA address change failed, err %s\n",
1989 			      ice_err_str(err));
1990 		return;
1991 	}
1992 
1993 	/* Initialize software Tx tracking values */
1994 	ice_init_tx_tracking(&sc->pf_vsi);
1995 
1996 	err = ice_cfg_vsi_for_tx(&sc->pf_vsi);
1997 	if (err) {
1998 		device_printf(dev,
1999 			      "Unable to configure the main VSI for Tx: %s\n",
2000 			      ice_err_str(err));
2001 		return;
2002 	}
2003 
2004 	err = ice_cfg_vsi_for_rx(&sc->pf_vsi);
2005 	if (err) {
2006 		device_printf(dev,
2007 			      "Unable to configure the main VSI for Rx: %s\n",
2008 			      ice_err_str(err));
2009 		goto err_cleanup_tx;
2010 	}
2011 
2012 	err = ice_control_all_rx_queues(&sc->pf_vsi, true);
2013 	if (err) {
2014 		device_printf(dev,
2015 			      "Unable to enable Rx rings for transmit: %s\n",
2016 			      ice_err_str(err));
2017 		goto err_cleanup_tx;
2018 	}
2019 
2020 	err = ice_cfg_pf_default_mac_filters(sc);
2021 	if (err) {
2022 		device_printf(dev,
2023 			      "Unable to configure default MAC filters: %s\n",
2024 			      ice_err_str(err));
2025 		goto err_stop_rx;
2026 	}
2027 
2028 	/* We use software interrupts for Tx, so we only program the hardware
2029 	 * interrupts for Rx.
2030 	 */
2031 	ice_configure_all_rxq_interrupts(&sc->pf_vsi);
2032 	ice_configure_rx_itr(&sc->pf_vsi);
2033 
2034 	/* Configure promiscuous mode */
2035 	ice_if_promisc_set(ctx, if_getflags(sc->ifp));
2036 
2037 	if (!ice_testandclear_state(&sc->state, ICE_STATE_FIRST_INIT_LINK))
2038 		if (!sc->link_up && ((if_getflags(sc->ifp) & IFF_UP) ||
2039 			 ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN)))
2040 			ice_set_link(sc, true);
2041 
2042 	ice_rdma_pf_init(sc);
2043 
2044 	ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
2045 	return;
2046 
2047 err_stop_rx:
2048 	ice_control_all_rx_queues(&sc->pf_vsi, false);
2049 err_cleanup_tx:
2050 	ice_vsi_disable_tx(&sc->pf_vsi);
2051 }
2052 
2053 /**
2054  * ice_poll_for_media_avail - Re-enable link if media is detected
2055  * @sc: device private structure
2056  *
2057  * Intended to be called from the driver's timer function, this function
2058  * sends the Get Link Status AQ command and re-enables HW link if the
2059  * command says that media is available.
2060  *
2061  * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing,
2062  * since media removal events are supposed to be sent to the driver through
2063  * a link status event.
2064  */
2065 static void
2066 ice_poll_for_media_avail(struct ice_softc *sc)
2067 {
2068 	struct ice_hw *hw = &sc->hw;
2069 	struct ice_port_info *pi = hw->port_info;
2070 
2071 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2072 		pi->phy.get_link_info = true;
2073 		ice_get_link_status(pi, &sc->link_up);
2074 
2075 		if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
2076 			enum ice_status status;
2077 
2078 			/* Re-enable link and re-apply user link settings */
2079 			if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) ||
2080 			    (if_getflags(sc->ifp) & IFF_UP)) {
2081 				ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
2082 
2083 				/* Update the OS about changes in media capability */
2084 				status = ice_add_media_types(sc, sc->media);
2085 				if (status)
2086 					device_printf(sc->dev,
2087 					    "Error adding device media types: %s aq_err %s\n",
2088 					    ice_status_str(status),
2089 					    ice_aq_str(hw->adminq.sq_last_status));
2090 			}
2091 
2092 			ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
2093 		}
2094 	}
2095 }
2096 
2097 /**
2098  * ice_if_timer - called by iflib periodically
2099  * @ctx: iflib ctx structure
2100  * @qid: the queue this timer was called for
2101  *
2102  * This callback is triggered by iflib periodically. We use it to update the
2103  * hw statistics.
2104  *
2105  * @remark this function is not protected by the iflib CTX lock.
2106  */
2107 static void
2108 ice_if_timer(if_ctx_t ctx, uint16_t qid)
2109 {
2110 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2111 	uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx;
2112 
2113 	if (qid != 0)
2114 		return;
2115 
2116 	/* Do not attempt to update stats when in recovery mode */
2117 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2118 		return;
2119 
2120 	/* Update device statistics */
2121 	ice_update_pf_stats(sc);
2122 
2123 	/*
2124 	 * For proper watchdog management, the iflib stack needs to know if
2125 	 * we've been paused during the last interval. Check if the
2126 	 * link_xoff_rx stat changed, and set the isc_pause_frames, if so.
2127 	 */
2128 	if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx)
2129 		sc->scctx->isc_pause_frames = 1;
2130 
2131 	/* Update the primary VSI stats */
2132 	ice_update_vsi_hw_stats(&sc->pf_vsi);
2133 }
2134 
2135 /**
2136  * ice_admin_timer - called periodically to trigger the admin task
2137  * @arg: callout(9) argument pointing to the device private softc structure
2138  *
2139  * Timer function used as part of a callout(9) timer that will periodically
2140  * trigger the admin task, even when the interface is down.
2141  *
2142  * @remark this function is not called by iflib and is not protected by the
2143  * iflib CTX lock.
2144  *
2145  * @remark because this is a callout function, it cannot sleep and should not
2146  * attempt taking the iflib CTX lock.
2147  */
2148 static void
2149 ice_admin_timer(void *arg)
2150 {
2151 	struct ice_softc *sc = (struct ice_softc *)arg;
2152 
2153 	/*
2154 	 * There is a point where callout routines are no longer
2155 	 * cancelable.  So there exists a window of time where the
2156 	 * driver enters detach() and tries to cancel the callout, but the
2157 	 * callout routine has passed the cancellation point.  The detach()
2158 	 * routine is unaware of this and tries to free resources that the
2159 	 * callout routine needs.  So we check for the detach state flag to
2160 	 * at least shrink the window of opportunity.
2161 	 */
2162 	if (ice_driver_is_detaching(sc))
2163 		return;
2164 
2165 	/* Fire off the admin task */
2166 	iflib_admin_intr_deferred(sc->ctx);
2167 
2168 	/* Reschedule the admin timer */
2169 	callout_schedule(&sc->admin_timer, hz/2);
2170 }
2171 
2172 /**
2173  * ice_transition_recovery_mode - Transition to recovery mode
2174  * @sc: the device private softc
2175  *
2176  * Called when the driver detects that the firmware has entered recovery mode
2177  * at run time.
2178  */
2179 static void
2180 ice_transition_recovery_mode(struct ice_softc *sc)
2181 {
2182 	struct ice_vsi *vsi = &sc->pf_vsi;
2183 	int i;
2184 
2185 	device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
2186 
2187 	/* Tell the stack that the link has gone down */
2188 	iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
2189 
2190 	/* Request that the device be re-initialized */
2191 	ice_request_stack_reinit(sc);
2192 
2193 	ice_rdma_pf_detach(sc);
2194 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2195 
2196 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2197 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2198 
2199 	ice_vsi_del_txqs_ctx(vsi);
2200 	ice_vsi_del_rxqs_ctx(vsi);
2201 
2202 	for (i = 0; i < sc->num_available_vsi; i++) {
2203 		if (sc->all_vsi[i])
2204 			ice_release_vsi(sc->all_vsi[i]);
2205 	}
2206 	sc->num_available_vsi = 0;
2207 
2208 	if (sc->all_vsi) {
2209 		free(sc->all_vsi, M_ICE);
2210 		sc->all_vsi = NULL;
2211 	}
2212 
2213 	/* Destroy the interrupt manager */
2214 	ice_resmgr_destroy(&sc->imgr);
2215 	/* Destroy the queue managers */
2216 	ice_resmgr_destroy(&sc->tx_qmgr);
2217 	ice_resmgr_destroy(&sc->rx_qmgr);
2218 
2219 	ice_deinit_hw(&sc->hw);
2220 }
2221 
2222 /**
2223  * ice_transition_safe_mode - Transition to safe mode
2224  * @sc: the device private softc
2225  *
2226  * Called when the driver attempts to reload the DDP package during a device
2227  * reset, and the new download fails. If so, we must transition to safe mode
2228  * at run time.
2229  *
2230  * @remark although safe mode normally allocates only a single queue, we can't
2231  * change the number of queues dynamically when using iflib. Due to this, we
2232  * do not attempt to reduce the number of queues.
2233  */
2234 static void
2235 ice_transition_safe_mode(struct ice_softc *sc)
2236 {
2237 	/* Indicate that we are in Safe mode */
2238 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
2239 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
2240 
2241 	ice_rdma_pf_detach(sc);
2242 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2243 
2244 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2245 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2246 
2247 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2248 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en);
2249 }
2250 
2251 /**
2252  * ice_if_update_admin_status - update admin status
2253  * @ctx: iflib ctx structure
2254  *
2255  * Called by iflib to update the admin status. For our purposes, this means
2256  * check the adminq, and update the link status. It's ultimately triggered by
2257  * our admin interrupt, or by the ice_if_timer periodically.
2258  *
2259  * @pre assumes the caller holds the iflib CTX lock
2260  */
2261 static void
2262 ice_if_update_admin_status(if_ctx_t ctx)
2263 {
2264 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2265 	enum ice_fw_modes fw_mode;
2266 	bool reschedule = false;
2267 	u16 pending = 0;
2268 
2269 	ASSERT_CTX_LOCKED(sc);
2270 
2271 	/* Check if the firmware entered recovery mode at run time */
2272 	fw_mode = ice_get_fw_mode(&sc->hw);
2273 	if (fw_mode == ICE_FW_MODE_REC) {
2274 		if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2275 			/* If we just entered recovery mode, log a warning to
2276 			 * the system administrator and deinit driver state
2277 			 * that is no longer functional.
2278 			 */
2279 			ice_transition_recovery_mode(sc);
2280 		}
2281 	} else if (fw_mode == ICE_FW_MODE_ROLLBACK) {
2282 		if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) {
2283 			/* Rollback mode isn't fatal, but we don't want to
2284 			 * repeatedly post a message about it.
2285 			 */
2286 			ice_print_rollback_msg(&sc->hw);
2287 		}
2288 	}
2289 
2290 	/* Handle global reset events */
2291 	ice_handle_reset_event(sc);
2292 
2293 	/* Handle PF reset requests */
2294 	ice_handle_pf_reset_request(sc);
2295 
2296 	/* Handle MDD events */
2297 	ice_handle_mdd_event(sc);
2298 
2299 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) ||
2300 	    ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) ||
2301 	    ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2302 		/*
2303 		 * If we know the control queues are disabled, skip processing
2304 		 * the control queues entirely.
2305 		 */
2306 		;
2307 	} else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) {
2308 		ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending);
2309 		if (pending > 0)
2310 			reschedule = true;
2311 
2312 		ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending);
2313 		if (pending > 0)
2314 			reschedule = true;
2315 	}
2316 
2317 	/* Poll for link up */
2318 	ice_poll_for_media_avail(sc);
2319 
2320 	/* Check and update link status */
2321 	ice_update_link_status(sc, false);
2322 
2323 	/*
2324 	 * If there are still messages to process, we need to reschedule
2325 	 * ourselves. Otherwise, we can just re-enable the interrupt. We'll be
2326 	 * woken up at the next interrupt or timer event.
2327 	 */
2328 	if (reschedule) {
2329 		ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
2330 		iflib_admin_intr_deferred(ctx);
2331 	} else {
2332 		ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2333 	}
2334 }
2335 
2336 /**
2337  * ice_prepare_for_reset - Prepare device for an impending reset
2338  * @sc: The device private softc
2339  *
2340  * Prepare the driver for an impending reset, shutting down VSIs, clearing the
2341  * scheduler setup, and shutting down controlqs. Uses the
2342  * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the
2343  * driver for reset or not.
2344  */
2345 static void
2346 ice_prepare_for_reset(struct ice_softc *sc)
2347 {
2348 	struct ice_hw *hw = &sc->hw;
2349 
2350 	/* If we're already prepared, there's nothing to do */
2351 	if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET))
2352 		return;
2353 
2354 	log(LOG_INFO, "%s: preparing to reset device logic\n", if_name(sc->ifp));
2355 
2356 	/* In recovery mode, hardware is not initialized */
2357 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2358 		return;
2359 
2360 	/* inform the RDMA client */
2361 	ice_rdma_notify_reset(sc);
2362 	/* stop the RDMA client */
2363 	ice_rdma_pf_stop(sc);
2364 
2365 	/* Release the main PF VSI queue mappings */
2366 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2367 				    sc->pf_vsi.num_tx_queues);
2368 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2369 				    sc->pf_vsi.num_rx_queues);
2370 
2371 	ice_clear_hw_tbls(hw);
2372 
2373 	if (hw->port_info)
2374 		ice_sched_cleanup_all(hw);
2375 
2376 	ice_shutdown_all_ctrlq(hw, false);
2377 }
2378 
2379 /**
2380  * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping
2381  * @sc: the device softc pointer
2382  *
2383  * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue
2384  * mapping after a reset occurred.
2385  */
2386 static int
2387 ice_rebuild_pf_vsi_qmap(struct ice_softc *sc)
2388 {
2389 	struct ice_vsi *vsi = &sc->pf_vsi;
2390 	struct ice_tx_queue *txq;
2391 	struct ice_rx_queue *rxq;
2392 	int err, i;
2393 
2394 	/* Re-assign Tx queues from PF space to the main VSI */
2395 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap,
2396 					    vsi->num_tx_queues);
2397 	if (err) {
2398 		device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n",
2399 			      ice_err_str(err));
2400 		return (err);
2401 	}
2402 
2403 	/* Re-assign Rx queues from PF space to this VSI */
2404 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap,
2405 					    vsi->num_rx_queues);
2406 	if (err) {
2407 		device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n",
2408 			      ice_err_str(err));
2409 		goto err_release_tx_queues;
2410 	}
2411 
2412 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
2413 
2414 	/* Re-assign Tx queue tail pointers */
2415 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
2416 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
2417 
2418 	/* Re-assign Rx queue tail pointers */
2419 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
2420 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
2421 
2422 	return (0);
2423 
2424 err_release_tx_queues:
2425 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2426 				   sc->pf_vsi.num_tx_queues);
2427 
2428 	return (err);
2429 }
2430 
2431 /* determine if the iflib context is active */
2432 #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING))
2433 
2434 /**
2435  * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode
2436  * @sc: The device private softc
2437  *
2438  * Handle a driver rebuild while in recovery mode. This will only rebuild the
2439  * limited functionality supported while in recovery mode.
2440  */
2441 static void
2442 ice_rebuild_recovery_mode(struct ice_softc *sc)
2443 {
2444 	device_t dev = sc->dev;
2445 
2446 	/* enable PCIe bus master */
2447 	pci_enable_busmaster(dev);
2448 
2449 	/* Configure interrupt causes for the administrative interrupt */
2450 	ice_configure_misc_interrupts(sc);
2451 
2452 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2453 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2454 
2455 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2456 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2457 
2458 	log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp));
2459 
2460 	/* In order to completely restore device functionality, the iflib core
2461 	 * needs to be reset. We need to request an iflib reset. Additionally,
2462 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2463 	 * the iflib core, we also want re-run the admin task so that iflib
2464 	 * resets immediately instead of waiting for the next interrupt.
2465 	 */
2466 	ice_request_stack_reinit(sc);
2467 
2468 	return;
2469 }
2470 
2471 /**
2472  * ice_rebuild - Rebuild driver state post reset
2473  * @sc: The device private softc
2474  *
2475  * Restore driver state after a reset occurred. Restart the controlqs, setup
2476  * the hardware port, and re-enable the VSIs.
2477  */
2478 static void
2479 ice_rebuild(struct ice_softc *sc)
2480 {
2481 	struct ice_hw *hw = &sc->hw;
2482 	device_t dev = sc->dev;
2483 	enum ice_ddp_state pkg_state;
2484 	enum ice_status status;
2485 	int err;
2486 
2487 	sc->rebuild_ticks = ticks;
2488 
2489 	/* If we're rebuilding, then a reset has succeeded. */
2490 	ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED);
2491 
2492 	/*
2493 	 * If the firmware is in recovery mode, only restore the limited
2494 	 * functionality supported by recovery mode.
2495 	 */
2496 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2497 		ice_rebuild_recovery_mode(sc);
2498 		return;
2499 	}
2500 
2501 	/* enable PCIe bus master */
2502 	pci_enable_busmaster(dev);
2503 
2504 	status = ice_init_all_ctrlq(hw);
2505 	if (status) {
2506 		device_printf(dev, "failed to re-init controlqs, err %s\n",
2507 			      ice_status_str(status));
2508 		goto err_shutdown_ctrlq;
2509 	}
2510 
2511 	/* Query the allocated resources for Tx scheduler */
2512 	status = ice_sched_query_res_alloc(hw);
2513 	if (status) {
2514 		device_printf(dev,
2515 			      "Failed to query scheduler resources, err %s aq_err %s\n",
2516 			      ice_status_str(status),
2517 			      ice_aq_str(hw->adminq.sq_last_status));
2518 		goto err_shutdown_ctrlq;
2519 	}
2520 
2521 	/* Re-enable FW logging. Keep going even if this fails */
2522 	status = ice_fwlog_set(hw, &hw->fwlog_cfg);
2523 	if (!status) {
2524 		/*
2525 		 * We should have the most updated cached copy of the
2526 		 * configuration, regardless of whether we're rebuilding
2527 		 * or not.  So we'll simply check to see if logging was
2528 		 * enabled pre-rebuild.
2529 		 */
2530 		if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2531 			status = ice_fwlog_register(hw);
2532 			if (status)
2533 				device_printf(dev, "failed to re-register fw logging, err %s aq_err %s\n",
2534 				   ice_status_str(status),
2535 				   ice_aq_str(hw->adminq.sq_last_status));
2536 		}
2537 	} else
2538 		device_printf(dev, "failed to rebuild fw logging configuration, err %s aq_err %s\n",
2539 		   ice_status_str(status),
2540 		   ice_aq_str(hw->adminq.sq_last_status));
2541 
2542 	err = ice_send_version(sc);
2543 	if (err)
2544 		goto err_shutdown_ctrlq;
2545 
2546 	err = ice_init_link_events(sc);
2547 	if (err) {
2548 		device_printf(dev, "ice_init_link_events failed: %s\n",
2549 			      ice_err_str(err));
2550 		goto err_shutdown_ctrlq;
2551 	}
2552 
2553 	status = ice_clear_pf_cfg(hw);
2554 	if (status) {
2555 		device_printf(dev, "failed to clear PF configuration, err %s\n",
2556 			      ice_status_str(status));
2557 		goto err_shutdown_ctrlq;
2558 	}
2559 
2560 	ice_clear_pxe_mode(hw);
2561 
2562 	status = ice_get_caps(hw);
2563 	if (status) {
2564 		device_printf(dev, "failed to get capabilities, err %s\n",
2565 			      ice_status_str(status));
2566 		goto err_shutdown_ctrlq;
2567 	}
2568 
2569 	status = ice_sched_init_port(hw->port_info);
2570 	if (status) {
2571 		device_printf(dev, "failed to initialize port, err %s\n",
2572 			      ice_status_str(status));
2573 		goto err_sched_cleanup;
2574 	}
2575 
2576 	/* If we previously loaded the package, it needs to be reloaded now */
2577 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) {
2578 		pkg_state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
2579 		if (!ice_is_init_pkg_successful(pkg_state)) {
2580 			ice_log_pkg_init(sc, pkg_state);
2581 			ice_transition_safe_mode(sc);
2582 		}
2583 	}
2584 
2585 	ice_reset_pf_stats(sc);
2586 
2587 	err = ice_rebuild_pf_vsi_qmap(sc);
2588 	if (err) {
2589 		device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n",
2590 			      ice_err_str(err));
2591 		goto err_sched_cleanup;
2592 	}
2593 	err = ice_initialize_vsi(&sc->pf_vsi);
2594 	if (err) {
2595 		device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n",
2596 			      ice_err_str(err));
2597 		goto err_release_queue_allocations;
2598 	}
2599 
2600 	/* Replay all VSI configuration */
2601 	err = ice_replay_all_vsi_cfg(sc);
2602 	if (err)
2603 		goto err_deinit_pf_vsi;
2604 
2605 	/* Re-enable FW health event reporting */
2606 	ice_init_health_events(sc);
2607 
2608 	/* Reconfigure the main PF VSI for RSS */
2609 	err = ice_config_rss(&sc->pf_vsi);
2610 	if (err) {
2611 		device_printf(sc->dev,
2612 			      "Unable to reconfigure RSS for the main VSI, err %s\n",
2613 			      ice_err_str(err));
2614 		goto err_deinit_pf_vsi;
2615 	}
2616 
2617 	if (hw->port_info->qos_cfg.is_sw_lldp)
2618 		ice_add_rx_lldp_filter(sc);
2619 
2620 	/* Refresh link status */
2621 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2622 	sc->hw.port_info->phy.get_link_info = true;
2623 	ice_get_link_status(sc->hw.port_info, &sc->link_up);
2624 	ice_update_link_status(sc, true);
2625 
2626 	/* RDMA interface will be restarted by the stack re-init */
2627 
2628 	/* Configure interrupt causes for the administrative interrupt */
2629 	ice_configure_misc_interrupts(sc);
2630 
2631 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2632 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2633 
2634 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2635 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2636 
2637 	log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp));
2638 
2639 	/* In order to completely restore device functionality, the iflib core
2640 	 * needs to be reset. We need to request an iflib reset. Additionally,
2641 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2642 	 * the iflib core, we also want re-run the admin task so that iflib
2643 	 * resets immediately instead of waiting for the next interrupt.
2644 	 * If LLDP is enabled we need to reconfig DCB to properly reinit all TC
2645 	 * queues, not only 0. It contains ice_request_stack_reinit as well.
2646 	 */
2647 	if (hw->port_info->qos_cfg.is_sw_lldp)
2648 		ice_request_stack_reinit(sc);
2649 	else
2650 		ice_do_dcb_reconfig(sc, false);
2651 
2652 	return;
2653 
2654 err_deinit_pf_vsi:
2655 	ice_deinit_vsi(&sc->pf_vsi);
2656 err_release_queue_allocations:
2657 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2658 				    sc->pf_vsi.num_tx_queues);
2659 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2660 				    sc->pf_vsi.num_rx_queues);
2661 err_sched_cleanup:
2662 	ice_sched_cleanup_all(hw);
2663 err_shutdown_ctrlq:
2664 	ice_shutdown_all_ctrlq(hw, false);
2665 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2666 	ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2667 	device_printf(dev, "Driver rebuild failed, please reload the device driver\n");
2668 }
2669 
2670 /**
2671  * ice_handle_reset_event - Handle reset events triggered by OICR
2672  * @sc: The device private softc
2673  *
2674  * Handle reset events triggered by an OICR notification. This includes CORER,
2675  * GLOBR, and EMPR resets triggered by software on this or any other PF or by
2676  * firmware.
2677  *
2678  * @pre assumes the iflib context lock is held, and will unlock it while
2679  * waiting for the hardware to finish reset.
2680  */
2681 static void
2682 ice_handle_reset_event(struct ice_softc *sc)
2683 {
2684 	struct ice_hw *hw = &sc->hw;
2685 	enum ice_status status;
2686 	device_t dev = sc->dev;
2687 
2688 	/* When a CORER, GLOBR, or EMPR is about to happen, the hardware will
2689 	 * trigger an OICR interrupt. Our OICR handler will determine when
2690 	 * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as
2691 	 * appropriate.
2692 	 */
2693 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
2694 		return;
2695 
2696 	ice_prepare_for_reset(sc);
2697 
2698 	/*
2699 	 * Release the iflib context lock and wait for the device to finish
2700 	 * resetting.
2701 	 */
2702 	IFLIB_CTX_UNLOCK(sc);
2703 	status = ice_check_reset(hw);
2704 	IFLIB_CTX_LOCK(sc);
2705 	if (status) {
2706 		device_printf(dev, "Device never came out of reset, err %s\n",
2707 			      ice_status_str(status));
2708 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2709 		return;
2710 	}
2711 
2712 	/* We're done with the reset, so we can rebuild driver state */
2713 	sc->hw.reset_ongoing = false;
2714 	ice_rebuild(sc);
2715 
2716 	/* In the unlikely event that a PF reset request occurs at the same
2717 	 * time as a global reset, clear the request now. This avoids
2718 	 * resetting a second time right after we reset due to a global event.
2719 	 */
2720 	if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2721 		device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n");
2722 }
2723 
2724 /**
2725  * ice_handle_pf_reset_request - Initiate PF reset requested by software
2726  * @sc: The device private softc
2727  *
2728  * Initiate a PF reset requested by software. We handle this in the admin task
2729  * so that only one thread actually handles driver preparation and cleanup,
2730  * rather than having multiple threads possibly attempt to run this code
2731  * simultaneously.
2732  *
2733  * @pre assumes the iflib context lock is held and will unlock it while
2734  * waiting for the PF reset to complete.
2735  */
2736 static void
2737 ice_handle_pf_reset_request(struct ice_softc *sc)
2738 {
2739 	struct ice_hw *hw = &sc->hw;
2740 	enum ice_status status;
2741 
2742 	/* Check for PF reset requests */
2743 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2744 		return;
2745 
2746 	/* Make sure we're prepared for reset */
2747 	ice_prepare_for_reset(sc);
2748 
2749 	/*
2750 	 * Release the iflib context lock and wait for the device to finish
2751 	 * resetting.
2752 	 */
2753 	IFLIB_CTX_UNLOCK(sc);
2754 	status = ice_reset(hw, ICE_RESET_PFR);
2755 	IFLIB_CTX_LOCK(sc);
2756 	if (status) {
2757 		device_printf(sc->dev, "device PF reset failed, err %s\n",
2758 			      ice_status_str(status));
2759 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2760 		return;
2761 	}
2762 
2763 	sc->soft_stats.pfr_count++;
2764 	ice_rebuild(sc);
2765 }
2766 
2767 /**
2768  * ice_init_device_features - Init device driver features
2769  * @sc: driver softc structure
2770  *
2771  * @pre assumes that the function capabilities bits have been set up by
2772  * ice_init_hw().
2773  */
2774 static void
2775 ice_init_device_features(struct ice_softc *sc)
2776 {
2777 	struct ice_hw *hw = &sc->hw;
2778 
2779 	/* Set capabilities that all devices support */
2780 	ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2781 	ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
2782 	ice_set_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2783 	ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
2784 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap);
2785 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap);
2786 	ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2787 	ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2788 	ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_cap);
2789 	ice_set_bit(ICE_FEATURE_DCB, sc->feat_cap);
2790 	ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2791 
2792 	/* Disable features due to hardware limitations... */
2793 	if (!hw->func_caps.common_cap.rss_table_size)
2794 		ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2795 	if (!hw->func_caps.common_cap.iwarp || !ice_enable_irdma)
2796 		ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2797 	if (!hw->func_caps.common_cap.dcb)
2798 		ice_clear_bit(ICE_FEATURE_DCB, sc->feat_cap);
2799 	/* Disable features due to firmware limitations... */
2800 	if (!ice_is_fw_health_report_supported(hw))
2801 		ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2802 	if (!ice_fwlog_supported(hw))
2803 		ice_clear_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2804 	if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2805 		if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_FW_LOGGING))
2806 			ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_en);
2807 		else
2808 			ice_fwlog_unregister(hw);
2809 	}
2810 
2811 	/* Disable capabilities not supported by the OS */
2812 	ice_disable_unsupported_features(sc->feat_cap);
2813 
2814 	/* RSS is always enabled for iflib */
2815 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS))
2816 		ice_set_bit(ICE_FEATURE_RSS, sc->feat_en);
2817 
2818 	/* Disable features based on sysctl settings */
2819 	if (!ice_tx_balance_en)
2820 		ice_clear_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2821 
2822 	if (hw->dev_caps.supported_sensors & ICE_SENSOR_SUPPORT_E810_INT_TEMP) {
2823 		ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_cap);
2824 		ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_en);
2825 	}
2826 }
2827 
2828 /**
2829  * ice_if_multi_set - Callback to update Multicast filters in HW
2830  * @ctx: iflib ctx structure
2831  *
2832  * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search
2833  * the if_multiaddrs list and determine which filters have been added or
2834  * removed from the list, and update HW programming to reflect the new list.
2835  *
2836  * @pre assumes the caller holds the iflib CTX lock
2837  */
2838 static void
2839 ice_if_multi_set(if_ctx_t ctx)
2840 {
2841 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2842 	int err;
2843 
2844 	ASSERT_CTX_LOCKED(sc);
2845 
2846 	/* Do not handle multicast configuration in recovery mode */
2847 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2848 		return;
2849 
2850 	err = ice_sync_multicast_filters(sc);
2851 	if (err) {
2852 		device_printf(sc->dev,
2853 			      "Failed to synchronize multicast filter list: %s\n",
2854 			      ice_err_str(err));
2855 		return;
2856 	}
2857 }
2858 
2859 /**
2860  * ice_if_vlan_register - Register a VLAN with the hardware
2861  * @ctx: iflib ctx pointer
2862  * @vtag: VLAN to add
2863  *
2864  * Programs the main PF VSI with a hardware filter for the given VLAN.
2865  *
2866  * @pre assumes the caller holds the iflib CTX lock
2867  */
2868 static void
2869 ice_if_vlan_register(if_ctx_t ctx, u16 vtag)
2870 {
2871 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2872 	enum ice_status status;
2873 
2874 	ASSERT_CTX_LOCKED(sc);
2875 
2876 	/* Do not handle VLAN configuration in recovery mode */
2877 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2878 		return;
2879 
2880 	status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag);
2881 	if (status) {
2882 		device_printf(sc->dev,
2883 			      "Failure adding VLAN %d to main VSI, err %s aq_err %s\n",
2884 			      vtag, ice_status_str(status),
2885 			      ice_aq_str(sc->hw.adminq.sq_last_status));
2886 	}
2887 }
2888 
2889 /**
2890  * ice_if_vlan_unregister - Remove a VLAN filter from the hardware
2891  * @ctx: iflib ctx pointer
2892  * @vtag: VLAN to add
2893  *
2894  * Removes the previously programmed VLAN filter from the main PF VSI.
2895  *
2896  * @pre assumes the caller holds the iflib CTX lock
2897  */
2898 static void
2899 ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag)
2900 {
2901 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2902 	enum ice_status status;
2903 
2904 	ASSERT_CTX_LOCKED(sc);
2905 
2906 	/* Do not handle VLAN configuration in recovery mode */
2907 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2908 		return;
2909 
2910 	status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag);
2911 	if (status) {
2912 		device_printf(sc->dev,
2913 			      "Failure removing VLAN %d from main VSI, err %s aq_err %s\n",
2914 			      vtag, ice_status_str(status),
2915 			      ice_aq_str(sc->hw.adminq.sq_last_status));
2916 	}
2917 }
2918 
2919 /**
2920  * ice_if_stop - Stop the device
2921  * @ctx: iflib context structure
2922  *
2923  * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0
2924  * down)
2925  *
2926  * @pre assumes the caller holds the iflib CTX lock
2927  */
2928 static void
2929 ice_if_stop(if_ctx_t ctx)
2930 {
2931 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2932 
2933 	ASSERT_CTX_LOCKED(sc);
2934 
2935 	/*
2936 	 * The iflib core may call IFDI_STOP prior to the first call to
2937 	 * IFDI_INIT. This will cause us to attempt to remove MAC filters we
2938 	 * don't have, and disable Tx queues which aren't yet configured.
2939 	 * Although it is likely these extra operations are harmless, they do
2940 	 * cause spurious warning messages to be displayed, which may confuse
2941 	 * users.
2942 	 *
2943 	 * To avoid these messages, we use a state bit indicating if we've
2944 	 * been initialized. It will be set when ice_if_init is called, and
2945 	 * cleared here in ice_if_stop.
2946 	 */
2947 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
2948 		return;
2949 
2950 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
2951 		device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n");
2952 		return;
2953 	}
2954 
2955 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
2956 		device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n");
2957 		return;
2958 	}
2959 
2960 	ice_rdma_pf_stop(sc);
2961 
2962 	/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
2963 	 * return of these functions because there's nothing we can really do
2964 	 * if they fail, and the functions already print error messages.
2965 	 * Just try to shut down as much as we can.
2966 	 */
2967 	ice_rm_pf_default_mac_filters(sc);
2968 
2969 	/* Dissociate the Tx and Rx queues from the interrupts */
2970 	ice_flush_txq_interrupts(&sc->pf_vsi);
2971 	ice_flush_rxq_interrupts(&sc->pf_vsi);
2972 
2973 	/* Disable the Tx and Rx queues */
2974 	ice_vsi_disable_tx(&sc->pf_vsi);
2975 	ice_control_all_rx_queues(&sc->pf_vsi, false);
2976 
2977 	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
2978 		 !(if_getflags(sc->ifp) & IFF_UP) && sc->link_up)
2979 		ice_set_link(sc, false);
2980 }
2981 
2982 /**
2983  * ice_if_get_counter - Get current value of an ifnet statistic
2984  * @ctx: iflib context pointer
2985  * @counter: ifnet counter to read
2986  *
2987  * Reads the current value of an ifnet counter for the device.
2988  *
2989  * This function is not protected by the iflib CTX lock.
2990  */
2991 static uint64_t
2992 ice_if_get_counter(if_ctx_t ctx, ift_counter counter)
2993 {
2994 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2995 
2996 	/* Return the counter for the main PF VSI */
2997 	return ice_get_ifnet_counter(&sc->pf_vsi, counter);
2998 }
2999 
3000 /**
3001  * ice_request_stack_reinit - Request that iflib re-initialize
3002  * @sc: the device private softc
3003  *
3004  * Request that the device be brought down and up, to re-initialize. For
3005  * example, this may be called when a device reset occurs, or when Tx and Rx
3006  * queues need to be re-initialized.
3007  *
3008  * This is required because the iflib state is outside the driver, and must be
3009  * re-initialized if we need to resart Tx and Rx queues.
3010  */
3011 void
3012 ice_request_stack_reinit(struct ice_softc *sc)
3013 {
3014 	if (CTX_ACTIVE(sc->ctx)) {
3015 		iflib_request_reset(sc->ctx);
3016 		iflib_admin_intr_deferred(sc->ctx);
3017 	}
3018 }
3019 
3020 /**
3021  * ice_driver_is_detaching - Check if the driver is detaching/unloading
3022  * @sc: device private softc
3023  *
3024  * Returns true if the driver is detaching, false otherwise.
3025  *
3026  * @remark on newer kernels, take advantage of iflib_in_detach in order to
3027  * report detachment correctly as early as possible.
3028  *
3029  * @remark this function is used by various code paths that want to avoid
3030  * running if the driver is about to be removed. This includes sysctls and
3031  * other driver access points. Note that it does not fully resolve
3032  * detach-based race conditions as it is possible for a thread to race with
3033  * iflib_in_detach.
3034  */
3035 bool
3036 ice_driver_is_detaching(struct ice_softc *sc)
3037 {
3038 	return (ice_test_state(&sc->state, ICE_STATE_DETACHING) ||
3039 		iflib_in_detach(sc->ctx));
3040 }
3041 
3042 /**
3043  * ice_if_priv_ioctl - Device private ioctl handler
3044  * @ctx: iflib context pointer
3045  * @command: The ioctl command issued
3046  * @data: ioctl specific data
3047  *
3048  * iflib callback for handling custom driver specific ioctls.
3049  *
3050  * @pre Assumes that the iflib context lock is held.
3051  */
3052 static int
3053 ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
3054 {
3055 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3056 	struct ifdrv *ifd;
3057 	device_t dev = sc->dev;
3058 
3059 	if (data == NULL)
3060 		return (EINVAL);
3061 
3062 	ASSERT_CTX_LOCKED(sc);
3063 
3064 	/* Make sure the command type is valid */
3065 	switch (command) {
3066 	case SIOCSDRVSPEC:
3067 	case SIOCGDRVSPEC:
3068 		/* Accepted commands */
3069 		break;
3070 	case SIOCGPRIVATE_0:
3071 		/*
3072 		 * Although we do not support this ioctl command, it's
3073 		 * expected that iflib will forward it to the IFDI_PRIV_IOCTL
3074 		 * handler. Do not print a message in this case
3075 		 */
3076 		return (ENOTSUP);
3077 	default:
3078 		/*
3079 		 * If we get a different command for this function, it's
3080 		 * definitely unexpected, so log a message indicating what
3081 		 * command we got for debugging purposes.
3082 		 */
3083 		device_printf(dev, "%s: unexpected ioctl command %08lx\n",
3084 			      __func__, command);
3085 		return (EINVAL);
3086 	}
3087 
3088 	ifd = (struct ifdrv *)data;
3089 
3090 	switch (ifd->ifd_cmd) {
3091 	case ICE_NVM_ACCESS:
3092 		return ice_handle_nvm_access_ioctl(sc, ifd);
3093 	case ICE_DEBUG_DUMP:
3094 		return ice_handle_debug_dump_ioctl(sc, ifd);
3095 	default:
3096 		return EINVAL;
3097 	}
3098 }
3099 
3100 /**
3101  * ice_if_i2c_req - I2C request handler for iflib
3102  * @ctx: iflib context pointer
3103  * @req: The I2C parameters to use
3104  *
3105  * Read from the port's I2C eeprom using the parameters from the ioctl.
3106  *
3107  * @remark The iflib-only part is pretty simple.
3108  */
3109 static int
3110 ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req)
3111 {
3112 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3113 
3114 	return ice_handle_i2c_req(sc, req);
3115 }
3116 
3117 /**
3118  * ice_if_suspend - PCI device suspend handler for iflib
3119  * @ctx: iflib context pointer
3120  *
3121  * Deinitializes the driver and clears HW resources in preparation for
3122  * suspend or an FLR.
3123  *
3124  * @returns 0; this return value is ignored
3125  */
3126 static int
3127 ice_if_suspend(if_ctx_t ctx)
3128 {
3129 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3130 
3131 	/* At least a PFR is always going to happen after this;
3132 	 * either via FLR or during the D3->D0 transition.
3133 	 */
3134 	ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
3135 
3136 	ice_prepare_for_reset(sc);
3137 
3138 	return (0);
3139 }
3140 
3141 /**
3142  * ice_if_resume - PCI device resume handler for iflib
3143  * @ctx: iflib context pointer
3144  *
3145  * Reinitializes the driver and the HW after PCI resume or after
3146  * an FLR. An init is performed by iflib after this function is finished.
3147  *
3148  * @returns 0; this return value is ignored
3149  */
3150 static int
3151 ice_if_resume(if_ctx_t ctx)
3152 {
3153 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3154 
3155 	ice_rebuild(sc);
3156 
3157 	return (0);
3158 }
3159 
3160 /**
3161  * ice_if_needs_restart - Tell iflib when the driver needs to be reinitialized
3162  * @ctx: iflib context pointer
3163  * @event: event code to check
3164  *
3165  * Defaults to returning false for unknown events.
3166  *
3167  * @returns true if iflib needs to reinit the interface
3168  */
3169 static bool
3170 ice_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event)
3171 {
3172 	switch (event) {
3173 	case IFLIB_RESTART_VLAN_CONFIG:
3174 	default:
3175 		return (false);
3176 	}
3177 }
3178 
3179