xref: /freebsd/sys/dev/ice/if_ice_iflib.c (revision 62cfcf62f627e5093fb37026a6d8c98e4d2ef04c)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2020, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 /*$FreeBSD$*/
32 
33 /**
34  * @file if_ice_iflib.c
35  * @brief iflib driver implementation
36  *
37  * Contains the main entry point for the iflib driver implementation. It
38  * implements the various ifdi driver methods, and sets up the module and
39  * driver values to load an iflib driver.
40  */
41 
42 #include "ice_iflib.h"
43 #include "ice_drv_info.h"
44 #include "ice_switch.h"
45 #include "ice_sched.h"
46 
47 #include <sys/module.h>
48 #include <sys/sockio.h>
49 #include <sys/smp.h>
50 #include <dev/pci/pcivar.h>
51 #include <dev/pci/pcireg.h>
52 
53 /*
54  * Device method prototypes
55  */
56 
57 static void *ice_register(device_t);
58 static int  ice_if_attach_pre(if_ctx_t);
59 static int  ice_attach_pre_recovery_mode(struct ice_softc *sc);
60 static int  ice_if_attach_post(if_ctx_t);
61 static void ice_attach_post_recovery_mode(struct ice_softc *sc);
62 static int  ice_if_detach(if_ctx_t);
63 static int  ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets);
64 static int  ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets);
65 static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix);
66 static void ice_if_queues_free(if_ctx_t ctx);
67 static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu);
68 static void ice_if_intr_enable(if_ctx_t ctx);
69 static void ice_if_intr_disable(if_ctx_t ctx);
70 static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
71 static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
72 static int ice_if_promisc_set(if_ctx_t ctx, int flags);
73 static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
74 static int ice_if_media_change(if_ctx_t ctx);
75 static void ice_if_init(if_ctx_t ctx);
76 static void ice_if_timer(if_ctx_t ctx, uint16_t qid);
77 static void ice_if_update_admin_status(if_ctx_t ctx);
78 static void ice_if_multi_set(if_ctx_t ctx);
79 static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag);
80 static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
81 static void ice_if_stop(if_ctx_t ctx);
82 static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter);
83 static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
84 static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
85 
86 static int ice_msix_que(void *arg);
87 static int ice_msix_admin(void *arg);
88 
89 /*
90  * Helper function prototypes
91  */
92 static int ice_pci_mapping(struct ice_softc *sc);
93 static void ice_free_pci_mapping(struct ice_softc *sc);
94 static void ice_update_link_status(struct ice_softc *sc, bool update_media);
95 static void ice_init_device_features(struct ice_softc *sc);
96 static void ice_init_tx_tracking(struct ice_vsi *vsi);
97 static void ice_handle_reset_event(struct ice_softc *sc);
98 static void ice_handle_pf_reset_request(struct ice_softc *sc);
99 static void ice_prepare_for_reset(struct ice_softc *sc);
100 static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc);
101 static void ice_rebuild(struct ice_softc *sc);
102 static void ice_rebuild_recovery_mode(struct ice_softc *sc);
103 static void ice_free_irqvs(struct ice_softc *sc);
104 static void ice_update_rx_mbuf_sz(struct ice_softc *sc);
105 static void ice_poll_for_media_avail(struct ice_softc *sc);
106 static void ice_setup_scctx(struct ice_softc *sc);
107 static int ice_allocate_msix(struct ice_softc *sc);
108 static void ice_admin_timer(void *arg);
109 static void ice_transition_recovery_mode(struct ice_softc *sc);
110 static void ice_transition_safe_mode(struct ice_softc *sc);
111 
112 /*
113  * Device Interface Declaration
114  */
115 
116 /**
117  * @var ice_methods
118  * @brief ice driver method entry points
119  *
120  * List of device methods implementing the generic device interface used by
121  * the device stack to interact with the ice driver. Since this is an iflib
122  * driver, most of the methods point to the generic iflib implementation.
123  */
124 static device_method_t ice_methods[] = {
125 	/* Device interface */
126 	DEVMETHOD(device_register, ice_register),
127 	DEVMETHOD(device_probe,    iflib_device_probe_vendor),
128 	DEVMETHOD(device_attach,   iflib_device_attach),
129 	DEVMETHOD(device_detach,   iflib_device_detach),
130 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
131 	DEVMETHOD(device_suspend,  iflib_device_suspend),
132 	DEVMETHOD(device_resume,   iflib_device_resume),
133 	DEVMETHOD_END
134 };
135 
136 /**
137  * @var ice_iflib_methods
138  * @brief iflib method entry points
139  *
140  * List of device methods used by the iflib stack to interact with this
141  * driver. These are the real main entry points used to interact with this
142  * driver.
143  */
144 static device_method_t ice_iflib_methods[] = {
145 	DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre),
146 	DEVMETHOD(ifdi_attach_post, ice_if_attach_post),
147 	DEVMETHOD(ifdi_detach, ice_if_detach),
148 	DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc),
149 	DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc),
150 	DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign),
151 	DEVMETHOD(ifdi_queues_free, ice_if_queues_free),
152 	DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set),
153 	DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable),
154 	DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable),
155 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable),
156 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable),
157 	DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set),
158 	DEVMETHOD(ifdi_media_status, ice_if_media_status),
159 	DEVMETHOD(ifdi_media_change, ice_if_media_change),
160 	DEVMETHOD(ifdi_init, ice_if_init),
161 	DEVMETHOD(ifdi_stop, ice_if_stop),
162 	DEVMETHOD(ifdi_timer, ice_if_timer),
163 	DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status),
164 	DEVMETHOD(ifdi_multi_set, ice_if_multi_set),
165 	DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register),
166 	DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister),
167 	DEVMETHOD(ifdi_get_counter, ice_if_get_counter),
168 	DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl),
169 	DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req),
170 	DEVMETHOD_END
171 };
172 
173 /**
174  * @var ice_driver
175  * @brief driver structure for the generic device stack
176  *
177  * driver_t definition used to setup the generic device methods.
178  */
179 static driver_t ice_driver = {
180 	.name = "ice",
181 	.methods = ice_methods,
182 	.size = sizeof(struct ice_softc),
183 };
184 
185 /**
186  * @var ice_iflib_driver
187  * @brief driver structure for the iflib stack
188  *
189  * driver_t definition used to setup the iflib device methods.
190  */
191 static driver_t ice_iflib_driver = {
192 	.name = "ice",
193 	.methods = ice_iflib_methods,
194 	.size = sizeof(struct ice_softc),
195 };
196 
197 extern struct if_txrx ice_txrx;
198 extern struct if_txrx ice_recovery_txrx;
199 
200 /**
201  * @var ice_sctx
202  * @brief ice driver shared context
203  *
204  * Structure defining shared values (context) that is used by all instances of
205  * the device. Primarily used to setup details about how the iflib stack
206  * should treat this driver. Also defines the default, minimum, and maximum
207  * number of descriptors in each ring.
208  */
209 static struct if_shared_ctx ice_sctx = {
210 	.isc_magic = IFLIB_MAGIC,
211 	.isc_q_align = PAGE_SIZE,
212 
213 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
214 	/* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but
215 	 * that doesn't make sense since that would be larger than the maximum
216 	 * size of a single packet.
217 	 */
218 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
219 
220 	/* XXX: This is only used by iflib to ensure that
221 	 * scctx->isc_tx_tso_size_max + the VLAN header is a valid size.
222 	 */
223 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
224 	/* XXX: This is used by iflib to set the number of segments in the TSO
225 	 * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the
226 	 * related ifnet parameter.
227 	 */
228 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
229 
230 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
231 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
232 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
233 
234 	.isc_nfl = 1,
235 	.isc_ntxqs = 1,
236 	.isc_nrxqs = 1,
237 
238 	.isc_admin_intrcnt = 1,
239 	.isc_vendor_info = ice_vendor_info_array,
240 	.isc_driver_version = __DECONST(char *, ice_driver_version),
241 	.isc_driver = &ice_iflib_driver,
242 
243 	/*
244 	 * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available
245 	 * for hardware checksum offload
246 	 *
247 	 * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the
248 	 * IP sum field, required by our hardware to calculate valid TSO
249 	 * checksums.
250 	 *
251 	 * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs
252 	 * even when the interface is down.
253 	 *
254 	 * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X
255 	 * vectors manually instead of relying on iflib code to do this.
256 	 */
257 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
258 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX,
259 
260 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
261 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
262 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
263 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
264 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
265 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
266 };
267 
268 /**
269  * @var ice_devclass
270  * @brief ice driver device class
271  *
272  * device class used to setup the ice driver module kobject class.
273  */
274 devclass_t ice_devclass;
275 DRIVER_MODULE(ice, pci, ice_driver, ice_devclass, ice_module_event_handler, 0);
276 
277 MODULE_VERSION(ice, 1);
278 MODULE_DEPEND(ice, pci, 1, 1, 1);
279 MODULE_DEPEND(ice, ether, 1, 1, 1);
280 MODULE_DEPEND(ice, iflib, 1, 1, 1);
281 MODULE_DEPEND(ice, firmware, 1, 1, 1);
282 
283 IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array);
284 
285 /* Static driver-wide sysctls */
286 #include "ice_iflib_sysctls.h"
287 
288 /**
289  * ice_pci_mapping - Map PCI BAR memory
290  * @sc: device private softc
291  *
292  * Map PCI BAR 0 for device operation.
293  */
294 static int
295 ice_pci_mapping(struct ice_softc *sc)
296 {
297 	int rc;
298 
299 	/* Map BAR0 */
300 	rc = ice_map_bar(sc->dev, &sc->bar0, 0);
301 	if (rc)
302 		return rc;
303 
304 	return 0;
305 }
306 
307 /**
308  * ice_free_pci_mapping - Release PCI BAR memory
309  * @sc: device private softc
310  *
311  * Release PCI BARs which were previously mapped by ice_pci_mapping().
312  */
313 static void
314 ice_free_pci_mapping(struct ice_softc *sc)
315 {
316 	/* Free BAR0 */
317 	ice_free_bar(sc->dev, &sc->bar0);
318 }
319 
320 /*
321  * Device methods
322  */
323 
324 /**
325  * ice_register - register device method callback
326  * @dev: the device being registered
327  *
328  * Returns a pointer to the shared context structure, which is used by iflib.
329  */
330 static void *
331 ice_register(device_t dev __unused)
332 {
333 	return &ice_sctx;
334 } /* ice_register */
335 
336 /**
337  * ice_setup_scctx - Setup the iflib softc context structure
338  * @sc: the device private structure
339  *
340  * Setup the parameters in if_softc_ctx_t structure used by the iflib stack
341  * when loading.
342  */
343 static void
344 ice_setup_scctx(struct ice_softc *sc)
345 {
346 	if_softc_ctx_t scctx = sc->scctx;
347 	struct ice_hw *hw = &sc->hw;
348 	bool safe_mode, recovery_mode;
349 
350 	safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE);
351 	recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE);
352 
353 	/*
354 	 * If the driver loads in Safe mode or Recovery mode, limit iflib to
355 	 * a single queue pair.
356 	 */
357 	if (safe_mode || recovery_mode) {
358 		scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1;
359 		scctx->isc_ntxqsets_max = 1;
360 		scctx->isc_nrxqsets_max = 1;
361 	} else {
362 		/*
363 		 * iflib initially sets the isc_ntxqsets and isc_nrxqsets to
364 		 * the values of the override sysctls. Cache these initial
365 		 * values so that the driver can be aware of what the iflib
366 		 * sysctl value is when setting up MSI-X vectors.
367 		 */
368 		sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets;
369 		sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets;
370 
371 		if (scctx->isc_ntxqsets == 0)
372 			scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size;
373 		if (scctx->isc_nrxqsets == 0)
374 			scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size;
375 
376 		scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq;
377 		scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq;
378 
379 		/*
380 		 * Sanity check that the iflib sysctl values are within the
381 		 * maximum supported range.
382 		 */
383 		if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max)
384 			sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max;
385 		if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max)
386 			sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max;
387 	}
388 
389 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
390 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
391 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
392 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
393 
394 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
395 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
396 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
397 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
398 
399 	scctx->isc_msix_bar = PCIR_BAR(ICE_MSIX_BAR);
400 	scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size;
401 
402 	/*
403 	 * If the driver loads in recovery mode, disable Tx/Rx functionality
404 	 */
405 	if (recovery_mode)
406 		scctx->isc_txrx = &ice_recovery_txrx;
407 	else
408 		scctx->isc_txrx = &ice_txrx;
409 
410 	/*
411 	 * If the driver loads in Safe mode or Recovery mode, disable
412 	 * advanced features including hardware offloads.
413 	 */
414 	if (safe_mode || recovery_mode) {
415 		scctx->isc_capenable = ICE_SAFE_CAPS;
416 		scctx->isc_tx_csum_flags = 0;
417 	} else {
418 		scctx->isc_capenable = ICE_FULL_CAPS;
419 		scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
420 	}
421 
422 	scctx->isc_capabilities = scctx->isc_capenable;
423 } /* ice_setup_scctx */
424 
425 /**
426  * ice_if_attach_pre - Early device attach logic
427  * @ctx: the iflib context structure
428  *
429  * Called by iflib during the attach process. Earliest main driver entry
430  * point which performs necessary hardware and driver initialization. Called
431  * before the Tx and Rx queues are allocated.
432  */
433 static int
434 ice_if_attach_pre(if_ctx_t ctx)
435 {
436 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
437 	enum ice_fw_modes fw_mode;
438 	enum ice_status status;
439 	if_softc_ctx_t scctx;
440 	struct ice_hw *hw;
441 	device_t dev;
442 	int err;
443 
444 	device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n");
445 
446 	sc->ctx = ctx;
447 	sc->media = iflib_get_media(ctx);
448 	sc->sctx = iflib_get_sctx(ctx);
449 	sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx);
450 
451 	dev = sc->dev = iflib_get_dev(ctx);
452 	scctx = sc->scctx = iflib_get_softc_ctx(ctx);
453 
454 	hw = &sc->hw;
455 	hw->back = sc;
456 
457 	snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name),
458 		 "%s:admin", device_get_nameunit(dev));
459 	mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF);
460 	callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0);
461 
462 	ASSERT_CTX_LOCKED(sc);
463 
464 	if (ice_pci_mapping(sc)) {
465 		err = (ENXIO);
466 		goto destroy_admin_timer;
467 	}
468 
469 	/* Save off the PCI information */
470 	ice_save_pci_info(hw, dev);
471 
472 	/* create tunables as early as possible */
473 	ice_add_device_tunables(sc);
474 
475 	/* Setup ControlQ lengths */
476 	ice_set_ctrlq_len(hw);
477 
478 	fw_mode = ice_get_fw_mode(hw);
479 	if (fw_mode == ICE_FW_MODE_REC) {
480 		device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
481 
482 		err = ice_attach_pre_recovery_mode(sc);
483 		if (err)
484 			goto free_pci_mapping;
485 
486 		return (0);
487 	}
488 
489 	/* Initialize the hw data structure */
490 	status = ice_init_hw(hw);
491 	if (status) {
492 		if (status == ICE_ERR_FW_API_VER) {
493 			/* Enter recovery mode, so that the driver remains
494 			 * loaded. This way, if the system administrator
495 			 * cannot update the driver, they may still attempt to
496 			 * downgrade the NVM.
497 			 */
498 			err = ice_attach_pre_recovery_mode(sc);
499 			if (err)
500 				goto free_pci_mapping;
501 
502 			return (0);
503 		} else {
504 			err = EIO;
505 			device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n",
506 				      ice_status_str(status),
507 				      ice_aq_str(hw->adminq.sq_last_status));
508 		}
509 		goto free_pci_mapping;
510 	}
511 
512 	/* Notify firmware of the device driver version */
513 	err = ice_send_version(sc);
514 	if (err)
515 		goto deinit_hw;
516 
517 	ice_load_pkg_file(sc);
518 
519 	err = ice_init_link_events(sc);
520 	if (err) {
521 		device_printf(dev, "ice_init_link_events failed: %s\n",
522 			      ice_err_str(err));
523 		goto deinit_hw;
524 	}
525 
526 	ice_print_nvm_version(sc);
527 
528 	ice_init_device_features(sc);
529 
530 	/* Setup the MAC address */
531 	iflib_set_mac(ctx, hw->port_info->mac.lan_addr);
532 
533 	/* Setup the iflib softc context structure */
534 	ice_setup_scctx(sc);
535 
536 	/* Initialize the Tx queue manager */
537 	err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq);
538 	if (err) {
539 		device_printf(dev, "Unable to initialize Tx queue manager: %s\n",
540 			      ice_err_str(err));
541 		goto deinit_hw;
542 	}
543 
544 	/* Initialize the Rx queue manager */
545 	err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq);
546 	if (err) {
547 		device_printf(dev, "Unable to initialize Rx queue manager: %s\n",
548 			      ice_err_str(err));
549 		goto free_tx_qmgr;
550 	}
551 
552 	/* Initialize the interrupt resource manager */
553 	err = ice_alloc_intr_tracking(sc);
554 	if (err)
555 		/* Errors are already printed */
556 		goto free_rx_qmgr;
557 
558 	/* Determine maximum number of VSIs we'll prepare for */
559 	sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE,
560 				    hw->func_caps.guar_num_vsi);
561 
562 	if (!sc->num_available_vsi) {
563 		err = EIO;
564 		device_printf(dev, "No VSIs allocated to host\n");
565 		goto free_intr_tracking;
566 	}
567 
568 	/* Allocate storage for the VSI pointers */
569 	sc->all_vsi = (struct ice_vsi **)
570 		malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi,
571 		       M_ICE, M_WAITOK | M_ZERO);
572 	if (!sc->all_vsi) {
573 		err = ENOMEM;
574 		device_printf(dev, "Unable to allocate VSI array\n");
575 		goto free_intr_tracking;
576 	}
577 
578 	/*
579 	 * Prepare the statically allocated primary PF VSI in the softc
580 	 * structure. Other VSIs will be dynamically allocated as needed.
581 	 */
582 	ice_setup_pf_vsi(sc);
583 
584 	err = ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max,
585 	    scctx->isc_nrxqsets_max);
586 	if (err) {
587 		device_printf(dev, "Unable to allocate VSI Queue maps\n");
588 		goto free_main_vsi;
589 	}
590 
591 	/* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */
592 	err = ice_allocate_msix(sc);
593 	if (err)
594 		goto free_main_vsi;
595 
596 	return 0;
597 
598 free_main_vsi:
599 	/* ice_release_vsi will free the queue maps if they were allocated */
600 	ice_release_vsi(&sc->pf_vsi);
601 	free(sc->all_vsi, M_ICE);
602 	sc->all_vsi = NULL;
603 free_intr_tracking:
604 	ice_free_intr_tracking(sc);
605 free_rx_qmgr:
606 	ice_resmgr_destroy(&sc->rx_qmgr);
607 free_tx_qmgr:
608 	ice_resmgr_destroy(&sc->tx_qmgr);
609 deinit_hw:
610 	ice_deinit_hw(hw);
611 free_pci_mapping:
612 	ice_free_pci_mapping(sc);
613 destroy_admin_timer:
614 	mtx_lock(&sc->admin_mtx);
615 	callout_stop(&sc->admin_timer);
616 	mtx_unlock(&sc->admin_mtx);
617 	mtx_destroy(&sc->admin_mtx);
618 	return err;
619 } /* ice_if_attach_pre */
620 
621 /**
622  * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery
623  * @sc: the device private softc
624  *
625  * Loads the device driver in limited Firmware Recovery mode, intended to
626  * allow users to update the firmware to attempt to recover the device.
627  *
628  * @remark We may enter recovery mode in case either (a) the firmware is
629  * detected to be in an invalid state and must be re-programmed, or (b) the
630  * driver detects that the loaded firmware has a non-compatible API version
631  * that the driver cannot operate with.
632  */
633 static int
634 ice_attach_pre_recovery_mode(struct ice_softc *sc)
635 {
636 	ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE);
637 
638 	/* Setup the iflib softc context */
639 	ice_setup_scctx(sc);
640 
641 	/* Setup the PF VSI back pointer */
642 	sc->pf_vsi.sc = sc;
643 
644 	/*
645 	 * We still need to allocate MSI-X vectors since we need one vector to
646 	 * run the administrative admin interrupt
647 	 */
648 	return ice_allocate_msix(sc);
649 }
650 
651 /**
652  * ice_update_link_status - notify OS of link state change
653  * @sc: device private softc structure
654  * @update_media: true if we should update media even if link didn't change
655  *
656  * Called to notify iflib core of link status changes. Should be called once
657  * during attach_post, and whenever link status changes during runtime.
658  *
659  * This call only updates the currently supported media types if the link
660  * status changed, or if update_media is set to true.
661  */
662 static void
663 ice_update_link_status(struct ice_softc *sc, bool update_media)
664 {
665 	struct ice_hw *hw = &sc->hw;
666 
667 	/* Never report link up when in recovery mode */
668 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
669 		return;
670 
671 	/* Report link status to iflib only once each time it changes */
672 	if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) {
673 		if (sc->link_up) { /* link is up */
674 			uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info);
675 
676 			iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
677 
678 			ice_link_up_msg(sc);
679 
680 			update_media = true;
681 		} else { /* link is down */
682 			iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
683 
684 			update_media = true;
685 		}
686 	}
687 
688 	/* Update the supported media types */
689 	if (update_media) {
690 		enum ice_status status = ice_add_media_types(sc, sc->media);
691 		if (status)
692 			device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
693 				      ice_status_str(status),
694 				      ice_aq_str(hw->adminq.sq_last_status));
695 	}
696 
697 	/* TODO: notify VFs of link state change */
698 }
699 
700 /**
701  * ice_if_attach_post - Late device attach logic
702  * @ctx: the iflib context structure
703  *
704  * Called by iflib to finish up attaching the device. Performs any attach
705  * logic which must wait until after the Tx and Rx queues have been
706  * allocated.
707  */
708 static int
709 ice_if_attach_post(if_ctx_t ctx)
710 {
711 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
712 	if_t ifp = iflib_get_ifp(ctx);
713 	int err;
714 
715 	ASSERT_CTX_LOCKED(sc);
716 
717 	/* We don't yet support loading if MSI-X is not supported */
718 	if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) {
719 		device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n");
720 		return (ENOTSUP);
721 	}
722 
723 	/* The ifnet structure hasn't yet been initialized when the attach_pre
724 	 * handler is called, so wait until attach_post to setup the
725 	 * isc_max_frame_size.
726 	 */
727 
728 	sc->ifp = ifp;
729 	sc->scctx->isc_max_frame_size = ifp->if_mtu +
730 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
731 
732 	/*
733 	 * If we are in recovery mode, only perform a limited subset of
734 	 * initialization to support NVM recovery.
735 	 */
736 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
737 		ice_attach_post_recovery_mode(sc);
738 		return (0);
739 	}
740 
741 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
742 
743 	err = ice_initialize_vsi(&sc->pf_vsi);
744 	if (err) {
745 		device_printf(sc->dev, "Unable to initialize Main VSI: %s\n",
746 			      ice_err_str(err));
747 		return err;
748 	}
749 
750 	/* Configure the main PF VSI for RSS */
751 	err = ice_config_rss(&sc->pf_vsi);
752 	if (err) {
753 		device_printf(sc->dev,
754 			      "Unable to configure RSS for the main VSI, err %s\n",
755 			      ice_err_str(err));
756 		return err;
757 	}
758 
759 	/* Configure switch to drop transmitted LLDP and PAUSE frames */
760 	err = ice_cfg_pf_ethertype_filters(sc);
761 	if (err)
762 		return err;
763 
764 	ice_get_and_print_bus_info(sc);
765 
766 	ice_set_link_management_mode(sc);
767 
768 	ice_init_saved_phy_cfg(sc);
769 
770 	ice_add_device_sysctls(sc);
771 
772 	/* Get DCBX/LLDP state and start DCBX agent */
773 	ice_init_dcb_setup(sc);
774 
775 	/* Setup link configuration parameters */
776 	ice_init_link_configuration(sc);
777 	ice_update_link_status(sc, true);
778 
779 	/* Configure interrupt causes for the administrative interrupt */
780 	ice_configure_misc_interrupts(sc);
781 
782 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
783 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
784 
785 	/* Start the admin timer */
786 	mtx_lock(&sc->admin_mtx);
787 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
788 	mtx_unlock(&sc->admin_mtx);
789 
790 	return 0;
791 } /* ice_if_attach_post */
792 
793 /**
794  * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery
795  * @sc: the device private softc
796  *
797  * Performs minimal work to prepare the driver to recover an NVM in case the
798  * firmware is in recovery mode.
799  */
800 static void
801 ice_attach_post_recovery_mode(struct ice_softc *sc)
802 {
803 	/* Configure interrupt causes for the administrative interrupt */
804 	ice_configure_misc_interrupts(sc);
805 
806 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
807 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
808 
809 	/* Start the admin timer */
810 	mtx_lock(&sc->admin_mtx);
811 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
812 	mtx_unlock(&sc->admin_mtx);
813 }
814 
815 /**
816  * ice_free_irqvs - Free IRQ vector memory
817  * @sc: the device private softc structure
818  *
819  * Free IRQ vector memory allocated during ice_if_msix_intr_assign.
820  */
821 static void
822 ice_free_irqvs(struct ice_softc *sc)
823 {
824 	struct ice_vsi *vsi = &sc->pf_vsi;
825 	if_ctx_t ctx = sc->ctx;
826 	int i;
827 
828 	/* If the irqvs array is NULL, then there are no vectors to free */
829 	if (sc->irqvs == NULL)
830 		return;
831 
832 	/* Free the IRQ vectors */
833 	for (i = 0; i < sc->num_irq_vectors; i++)
834 		iflib_irq_free(ctx, &sc->irqvs[i].irq);
835 
836 	/* Clear the irqv pointers */
837 	for (i = 0; i < vsi->num_rx_queues; i++)
838 		vsi->rx_queues[i].irqv = NULL;
839 
840 	for (i = 0; i < vsi->num_tx_queues; i++)
841 		vsi->tx_queues[i].irqv = NULL;
842 
843 	/* Release the vector array memory */
844 	free(sc->irqvs, M_ICE);
845 	sc->irqvs = NULL;
846 	sc->num_irq_vectors = 0;
847 }
848 
849 /**
850  * ice_if_detach - Device driver detach logic
851  * @ctx: iflib context structure
852  *
853  * Perform device shutdown logic to detach the device driver.
854  *
855  * Note that there is no guarantee of the ordering of ice_if_queues_free() and
856  * ice_if_detach(). It is possible for the functions to be called in either
857  * order, and they must not assume to have a strict ordering.
858  */
859 static int
860 ice_if_detach(if_ctx_t ctx)
861 {
862 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
863 	struct ice_vsi *vsi = &sc->pf_vsi;
864 	int i;
865 
866 	ASSERT_CTX_LOCKED(sc);
867 
868 	/* Indicate that we're detaching */
869 	ice_set_state(&sc->state, ICE_STATE_DETACHING);
870 
871 	/* Stop the admin timer */
872 	mtx_lock(&sc->admin_mtx);
873 	callout_stop(&sc->admin_timer);
874 	mtx_unlock(&sc->admin_mtx);
875 	mtx_destroy(&sc->admin_mtx);
876 
877 	/* Free allocated media types */
878 	ifmedia_removeall(sc->media);
879 
880 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
881 	 * pointers. Note, the calls here and those in ice_if_queues_free()
882 	 * are *BOTH* necessary, as we cannot guarantee which path will be
883 	 * run first
884 	 */
885 	ice_vsi_del_txqs_ctx(vsi);
886 	ice_vsi_del_rxqs_ctx(vsi);
887 
888 	/* Release MSI-X resources */
889 	ice_free_irqvs(sc);
890 
891 	for (i = 0; i < sc->num_available_vsi; i++) {
892 		if (sc->all_vsi[i])
893 			ice_release_vsi(sc->all_vsi[i]);
894 	}
895 
896 	if (sc->all_vsi) {
897 		free(sc->all_vsi, M_ICE);
898 		sc->all_vsi = NULL;
899 	}
900 
901 	/* Release MSI-X memory */
902 	pci_release_msi(sc->dev);
903 
904 	if (sc->msix_table != NULL) {
905 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
906 				     rman_get_rid(sc->msix_table),
907 				     sc->msix_table);
908 		sc->msix_table = NULL;
909 	}
910 
911 	ice_free_intr_tracking(sc);
912 
913 	/* Destroy the queue managers */
914 	ice_resmgr_destroy(&sc->tx_qmgr);
915 	ice_resmgr_destroy(&sc->rx_qmgr);
916 
917 	if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
918 		ice_deinit_hw(&sc->hw);
919 
920 	ice_free_pci_mapping(sc);
921 
922 	return 0;
923 } /* ice_if_detach */
924 
925 /**
926  * ice_if_tx_queues_alloc - Allocate Tx queue memory
927  * @ctx: iflib context structure
928  * @vaddrs: virtual addresses for the queue memory
929  * @paddrs: physical addresses for the queue memory
930  * @ntxqs: the number of Tx queues per set (should always be 1)
931  * @ntxqsets: the number of Tx queue sets to allocate
932  *
933  * Called by iflib to allocate Tx queues for the device. Allocates driver
934  * memory to track each queue, the status arrays used for descriptor
935  * status reporting, and Tx queue sysctls.
936  */
937 static int
938 ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
939 		       int __invariant_only ntxqs, int ntxqsets)
940 {
941 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
942 	struct ice_vsi *vsi = &sc->pf_vsi;
943 	struct ice_tx_queue *txq;
944 	int err, i, j;
945 
946 	MPASS(ntxqs == 1);
947 	MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
948 	ASSERT_CTX_LOCKED(sc);
949 
950 	/* Do not bother allocating queues if we're in recovery mode */
951 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
952 		return (0);
953 
954 	/* Allocate queue structure memory */
955 	if (!(vsi->tx_queues =
956 	      (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_WAITOK | M_ZERO))) {
957 		device_printf(sc->dev, "Unable to allocate Tx queue memory\n");
958 		return (ENOMEM);
959 	}
960 
961 	/* Allocate report status arrays */
962 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
963 		if (!(txq->tx_rsq =
964 		      (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_WAITOK))) {
965 			device_printf(sc->dev, "Unable to allocate tx_rsq memory\n");
966 			err = ENOMEM;
967 			goto free_tx_queues;
968 		}
969 		/* Initialize report status array */
970 		for (j = 0; j < sc->scctx->isc_ntxd[0]; j++)
971 			txq->tx_rsq[j] = QIDX_INVALID;
972 	}
973 
974 	/* Assign queues from PF space to the main VSI */
975 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets);
976 	if (err) {
977 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
978 			      ice_err_str(err));
979 		goto free_tx_queues;
980 	}
981 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
982 
983 	/* Add Tx queue sysctls context */
984 	ice_vsi_add_txqs_ctx(vsi);
985 
986 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
987 		txq->me = i;
988 		txq->vsi = vsi;
989 
990 		/* store the queue size for easier access */
991 		txq->desc_count = sc->scctx->isc_ntxd[0];
992 
993 		/* get the virtual and physical address of the hardware queues */
994 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
995 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
996 		txq->tx_paddr = paddrs[i];
997 
998 		ice_add_txq_sysctls(txq);
999 	}
1000 
1001 	vsi->num_tx_queues = ntxqsets;
1002 
1003 	return (0);
1004 
1005 free_tx_queues:
1006 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1007 		if (txq->tx_rsq != NULL) {
1008 			free(txq->tx_rsq, M_ICE);
1009 			txq->tx_rsq = NULL;
1010 		}
1011 	}
1012 	free(vsi->tx_queues, M_ICE);
1013 	vsi->tx_queues = NULL;
1014 	return err;
1015 }
1016 
1017 /**
1018  * ice_if_rx_queues_alloc - Allocate Rx queue memory
1019  * @ctx: iflib context structure
1020  * @vaddrs: virtual addresses for the queue memory
1021  * @paddrs: physical addresses for the queue memory
1022  * @nrxqs: number of Rx queues per set (should always be 1)
1023  * @nrxqsets: number of Rx queue sets to allocate
1024  *
1025  * Called by iflib to allocate Rx queues for the device. Allocates driver
1026  * memory to track each queue, as well as sets up the Rx queue sysctls.
1027  */
1028 static int
1029 ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1030 		       int __invariant_only nrxqs, int nrxqsets)
1031 {
1032 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1033 	struct ice_vsi *vsi = &sc->pf_vsi;
1034 	struct ice_rx_queue *rxq;
1035 	int err, i;
1036 
1037 	MPASS(nrxqs == 1);
1038 	MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
1039 	ASSERT_CTX_LOCKED(sc);
1040 
1041 	/* Do not bother allocating queues if we're in recovery mode */
1042 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1043 		return (0);
1044 
1045 	/* Allocate queue structure memory */
1046 	if (!(vsi->rx_queues =
1047 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_WAITOK | M_ZERO))) {
1048 		device_printf(sc->dev, "Unable to allocate Rx queue memory\n");
1049 		return (ENOMEM);
1050 	}
1051 
1052 	/* Assign queues from PF space to the main VSI */
1053 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets);
1054 	if (err) {
1055 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1056 			      ice_err_str(err));
1057 		goto free_rx_queues;
1058 	}
1059 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1060 
1061 	/* Add Rx queue sysctls context */
1062 	ice_vsi_add_rxqs_ctx(vsi);
1063 
1064 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
1065 		rxq->me = i;
1066 		rxq->vsi = vsi;
1067 
1068 		/* store the queue size for easier access */
1069 		rxq->desc_count = sc->scctx->isc_nrxd[0];
1070 
1071 		/* get the virtual and physical address of the hardware queues */
1072 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
1073 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
1074 		rxq->rx_paddr = paddrs[i];
1075 
1076 		ice_add_rxq_sysctls(rxq);
1077 	}
1078 
1079 	vsi->num_rx_queues = nrxqsets;
1080 
1081 	return (0);
1082 
1083 free_rx_queues:
1084 	free(vsi->rx_queues, M_ICE);
1085 	vsi->rx_queues = NULL;
1086 	return err;
1087 }
1088 
1089 /**
1090  * ice_if_queues_free - Free queue memory
1091  * @ctx: the iflib context structure
1092  *
1093  * Free queue memory allocated by ice_if_tx_queues_alloc() and
1094  * ice_if_rx_queues_alloc().
1095  *
1096  * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be
1097  * called in the same order. It's possible for ice_if_queues_free() to be
1098  * called prior to ice_if_detach(), and vice versa.
1099  *
1100  * For this reason, the main VSI is a static member of the ice_softc, which is
1101  * not free'd until after iflib finishes calling both of these functions.
1102  *
1103  * Thus, care must be taken in how we manage the memory being freed by this
1104  * function, and in what tasks it can and must perform.
1105  */
1106 static void
1107 ice_if_queues_free(if_ctx_t ctx)
1108 {
1109 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1110 	struct ice_vsi *vsi = &sc->pf_vsi;
1111 	struct ice_tx_queue *txq;
1112 	int i;
1113 
1114 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
1115 	 * pointers. Note, the calls here and those in ice_if_detach()
1116 	 * are *BOTH* necessary, as we cannot guarantee which path will be
1117 	 * run first
1118 	 */
1119 	ice_vsi_del_txqs_ctx(vsi);
1120 	ice_vsi_del_rxqs_ctx(vsi);
1121 
1122 	/* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */
1123 	ice_free_irqvs(sc);
1124 
1125 	if (vsi->tx_queues != NULL) {
1126 		/* free the tx_rsq arrays */
1127 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1128 			if (txq->tx_rsq != NULL) {
1129 				free(txq->tx_rsq, M_ICE);
1130 				txq->tx_rsq = NULL;
1131 			}
1132 		}
1133 		free(vsi->tx_queues, M_ICE);
1134 		vsi->tx_queues = NULL;
1135 		vsi->num_tx_queues = 0;
1136 	}
1137 	if (vsi->rx_queues != NULL) {
1138 		free(vsi->rx_queues, M_ICE);
1139 		vsi->rx_queues = NULL;
1140 		vsi->num_rx_queues = 0;
1141 	}
1142 }
1143 
1144 /**
1145  * ice_msix_que - Fast interrupt handler for MSI-X receive queues
1146  * @arg: The Rx queue memory
1147  *
1148  * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when
1149  * an MSI-X interrupt for a given queue is triggered. Currently this just asks
1150  * iflib to schedule the main Rx thread.
1151  */
1152 static int
1153 ice_msix_que(void *arg)
1154 {
1155 	struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg;
1156 
1157 	/* TODO: dynamic ITR algorithm?? */
1158 
1159 	return (FILTER_SCHEDULE_THREAD);
1160 }
1161 
1162 /**
1163  * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt
1164  * @arg: pointer to device softc memory
1165  *
1166  * Called by iflib when an administrative interrupt occurs. Should perform any
1167  * fast logic for handling the interrupt cause, and then indicate whether the
1168  * admin task needs to be queued.
1169  */
1170 static int
1171 ice_msix_admin(void *arg)
1172 {
1173 	struct ice_softc *sc = (struct ice_softc *)arg;
1174 	struct ice_hw *hw = &sc->hw;
1175 	device_t dev = sc->dev;
1176 	u32 oicr;
1177 
1178 	/* There is no safe way to modify the enabled miscellaneous causes of
1179 	 * the OICR vector at runtime, as doing so would be prone to race
1180 	 * conditions. Reading PFINT_OICR will unmask the associated interrupt
1181 	 * causes and allow future interrupts to occur. The admin interrupt
1182 	 * vector will not be re-enabled until after we exit this function,
1183 	 * but any delayed tasks must be resilient against possible "late
1184 	 * arrival" interrupts that occur while we're already handling the
1185 	 * task. This is done by using state bits and serializing these
1186 	 * delayed tasks via the admin status task function.
1187 	 */
1188 	oicr = rd32(hw, PFINT_OICR);
1189 
1190 	/* Processing multiple controlq interrupts on a single vector does not
1191 	 * provide an indication of which controlq triggered the interrupt.
1192 	 * We might try reading the INTEVENT bit of the respective PFINT_*_CTL
1193 	 * registers. However, the INTEVENT bit is not guaranteed to be set as
1194 	 * it gets automatically cleared when the hardware acknowledges the
1195 	 * interrupt.
1196 	 *
1197 	 * This means we don't really have a good indication of whether or
1198 	 * which controlq triggered this interrupt. We'll just notify the
1199 	 * admin task that it should check all the controlqs.
1200 	 */
1201 	ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
1202 
1203 	if (oicr & PFINT_OICR_VFLR_M) {
1204 		ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING);
1205 	}
1206 
1207 	if (oicr & PFINT_OICR_MAL_DETECT_M) {
1208 		ice_set_state(&sc->state, ICE_STATE_MDD_PENDING);
1209 	}
1210 
1211 	if (oicr & PFINT_OICR_GRST_M) {
1212 		u32 reset;
1213 
1214 		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
1215 			GLGEN_RSTAT_RESET_TYPE_S;
1216 
1217 		if (reset == ICE_RESET_CORER)
1218 			sc->soft_stats.corer_count++;
1219 		else if (reset == ICE_RESET_GLOBR)
1220 			sc->soft_stats.globr_count++;
1221 		else
1222 			sc->soft_stats.empr_count++;
1223 
1224 		/* There are a couple of bits at play for handling resets.
1225 		 * First, the ICE_STATE_RESET_OICR_RECV bit is used to
1226 		 * indicate that the driver has received an OICR with a reset
1227 		 * bit active, indicating that a CORER/GLOBR/EMPR is about to
1228 		 * happen. Second, we set hw->reset_ongoing to indicate that
1229 		 * the hardware is in reset. We will set this back to false as
1230 		 * soon as the driver has determined that the hardware is out
1231 		 * of reset.
1232 		 *
1233 		 * If the driver wishes to trigger a reqest, it can set one of
1234 		 * the ICE_STATE_RESET_*_REQ bits, which will trigger the
1235 		 * correct type of reset.
1236 		 */
1237 		if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
1238 			hw->reset_ongoing = true;
1239 	}
1240 
1241 	if (oicr & PFINT_OICR_ECC_ERR_M) {
1242 		device_printf(dev, "ECC Error detected!\n");
1243 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1244 	}
1245 
1246 	if (oicr & PFINT_OICR_PE_CRITERR_M) {
1247 		device_printf(dev, "Critical Protocol Engine Error detected!\n");
1248 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1249 	}
1250 
1251 	if (oicr & PFINT_OICR_PCI_EXCEPTION_M) {
1252 		device_printf(dev, "PCI Exception detected!\n");
1253 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1254 	}
1255 
1256 	if (oicr & PFINT_OICR_HMC_ERR_M) {
1257 		/* Log the HMC errors, but don't disable the interrupt cause */
1258 		ice_log_hmc_error(hw, dev);
1259 	}
1260 
1261 	return (FILTER_SCHEDULE_THREAD);
1262 }
1263 
1264 /**
1265  * ice_allocate_msix - Allocate MSI-X vectors for the interface
1266  * @sc: the device private softc
1267  *
1268  * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process.
1269  *
1270  * First, determine a suitable total number of vectors based on the number
1271  * of CPUs, RSS buckets, the administrative vector, and other demands such as
1272  * RDMA.
1273  *
1274  * Request the desired amount of vectors, and see how many we obtain. If we
1275  * don't obtain as many as desired, reduce the demands by lowering the number
1276  * of requested queues or reducing the demand from other features such as
1277  * RDMA.
1278  *
1279  * @remark This function is required because the driver sets the
1280  * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors
1281  * manually.
1282  *
1283  * @remark This driver will only use MSI-X vectors. If this is not possible,
1284  * neither MSI or legacy interrupts will be tried.
1285  *
1286  * @post on success this function must set the following scctx parameters:
1287  * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr.
1288  *
1289  * @returns zero on success or an error code on failure.
1290  */
1291 static int
1292 ice_allocate_msix(struct ice_softc *sc)
1293 {
1294 	bool iflib_override_queue_count = false;
1295 	if_softc_ctx_t scctx = sc->scctx;
1296 	device_t dev = sc->dev;
1297 	cpuset_t cpus;
1298 	int bar, queues, vectors, requested;
1299 	int err = 0;
1300 
1301 	/* Allocate the MSI-X bar */
1302 	bar = scctx->isc_msix_bar;
1303 	sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE);
1304 	if (!sc->msix_table) {
1305 		device_printf(dev, "Unable to map MSI-X table\n");
1306 		return (ENOMEM);
1307 	}
1308 
1309 	/* Check if the iflib queue count sysctls have been set */
1310 	if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs)
1311 		iflib_override_queue_count = true;
1312 
1313 	err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus);
1314 	if (err) {
1315 		device_printf(dev, "%s: Unable to fetch the CPU list: %s\n",
1316 			      __func__, ice_err_str(err));
1317 		CPU_COPY(&all_cpus, &cpus);
1318 	}
1319 
1320 	/* Attempt to mimic behavior of iflib_msix_init */
1321 	if (iflib_override_queue_count) {
1322 		/*
1323 		 * If the override sysctls have been set, limit the queues to
1324 		 * the number of logical CPUs.
1325 		 */
1326 		queues = mp_ncpus;
1327 	} else {
1328 		/*
1329 		 * Otherwise, limit the queue count to the CPUs associated
1330 		 * with the NUMA node the device is associated with.
1331 		 */
1332 		queues = CPU_COUNT(&cpus);
1333 	}
1334 
1335 	/* Clamp to the number of RSS buckets */
1336 	queues = imin(queues, rss_getnumbuckets());
1337 
1338 	/*
1339 	 * Clamp the number of queue pairs to the minimum of the requested Tx
1340 	 * and Rx queues.
1341 	 */
1342 	queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
1343 	queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
1344 
1345 	/*
1346 	 * Determine the number of vectors to request. Note that we also need
1347 	 * to allocate one vector for administrative tasks.
1348 	 */
1349 	requested = queues + 1;
1350 
1351 	vectors = requested;
1352 
1353 	err = pci_alloc_msix(dev, &vectors);
1354 	if (err) {
1355 		device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n",
1356 			      vectors, ice_err_str(err));
1357 		goto err_free_msix_table;
1358 	}
1359 
1360 	/* If we don't receive enough vectors, reduce demands */
1361 	if (vectors < requested) {
1362 		int diff = requested - vectors;
1363 
1364 		device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n",
1365 			      requested, vectors);
1366 
1367 		/*
1368 		 * If we still have a difference, we need to reduce the number
1369 		 * of queue pairs.
1370 		 *
1371 		 * However, we still need at least one vector for the admin
1372 		 * interrupt and one queue pair.
1373 		 */
1374 		if (queues <= diff) {
1375 			device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n");
1376 			err = (ERANGE);
1377 			goto err_pci_release_msi;
1378 		}
1379 
1380 		queues -= diff;
1381 	}
1382 
1383 	device_printf(dev, "Using %d Tx and Rx queues\n", queues);
1384 	device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
1385 		      vectors);
1386 
1387 	scctx->isc_vectors = vectors;
1388 	scctx->isc_nrxqsets = queues;
1389 	scctx->isc_ntxqsets = queues;
1390 	scctx->isc_intr = IFLIB_INTR_MSIX;
1391 
1392 	/* Interrupt allocation tracking isn't required in recovery mode,
1393 	 * since neither RDMA nor VFs are enabled.
1394 	 */
1395 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1396 		return (0);
1397 
1398 	/* Keep track of which interrupt indices are being used for what */
1399 	sc->lan_vectors = vectors;
1400 	err = ice_resmgr_assign_contiguous(&sc->imgr, sc->pf_imap, sc->lan_vectors);
1401 	if (err) {
1402 		device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
1403 			      ice_err_str(err));
1404 		goto err_pci_release_msi;
1405 	}
1406 
1407 	return (0);
1408 
1409 err_pci_release_msi:
1410 	pci_release_msi(dev);
1411 err_free_msix_table:
1412 	if (sc->msix_table != NULL) {
1413 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
1414 				rman_get_rid(sc->msix_table),
1415 				sc->msix_table);
1416 		sc->msix_table = NULL;
1417 	}
1418 
1419 	return (err);
1420 }
1421 
1422 /**
1423  * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues
1424  * @ctx: the iflib context structure
1425  * @msix: the number of vectors we were assigned
1426  *
1427  * Called by iflib to assign MSI-X vectors to queues. Currently requires that
1428  * we get at least the same number of vectors as we have queues, and that we
1429  * always have the same number of Tx and Rx queues.
1430  *
1431  * Tx queues use a softirq instead of using their own hardware interrupt.
1432  */
1433 static int
1434 ice_if_msix_intr_assign(if_ctx_t ctx, int msix)
1435 {
1436 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1437 	struct ice_vsi *vsi = &sc->pf_vsi;
1438 	int err, i, vector;
1439 
1440 	ASSERT_CTX_LOCKED(sc);
1441 
1442 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
1443 		device_printf(sc->dev,
1444 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
1445 			      vsi->num_tx_queues, vsi->num_rx_queues);
1446 		return (EOPNOTSUPP);
1447 	}
1448 
1449 	if (msix < (vsi->num_rx_queues + 1)) {
1450 		device_printf(sc->dev,
1451 			      "Not enough MSI-X vectors to assign one vector to each queue pair\n");
1452 		return (EOPNOTSUPP);
1453 	}
1454 
1455 	/* Save the number of vectors for future use */
1456 	sc->num_irq_vectors = vsi->num_rx_queues + 1;
1457 
1458 	/* Allocate space to store the IRQ vector data */
1459 	if (!(sc->irqvs =
1460 	      (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors),
1461 					       M_ICE, M_NOWAIT))) {
1462 		device_printf(sc->dev,
1463 			      "Unable to allocate irqv memory\n");
1464 		return (ENOMEM);
1465 	}
1466 
1467 	/* Administrative interrupt events will use vector 0 */
1468 	err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN,
1469 				      ice_msix_admin, sc, 0, "admin");
1470 	if (err) {
1471 		device_printf(sc->dev,
1472 			      "Failed to register Admin queue handler: %s\n",
1473 			      ice_err_str(err));
1474 		goto free_irqvs;
1475 	}
1476 	sc->irqvs[0].me = 0;
1477 
1478 	/* Do not allocate queue interrupts when in recovery mode */
1479 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1480 		return (0);
1481 
1482 	for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) {
1483 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1484 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1485 		int rid = vector + 1;
1486 		char irq_name[16];
1487 
1488 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
1489 		err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid,
1490 					      IFLIB_INTR_RX, ice_msix_que,
1491 					      rxq, rxq->me, irq_name);
1492 		if (err) {
1493 			device_printf(sc->dev,
1494 				      "Failed to allocate q int %d err: %s\n",
1495 				      i, ice_err_str(err));
1496 			vector--;
1497 			i--;
1498 			goto fail;
1499 		}
1500 		sc->irqvs[vector].me = vector;
1501 		rxq->irqv = &sc->irqvs[vector];
1502 
1503 		bzero(irq_name, sizeof(irq_name));
1504 
1505 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
1506 		iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq,
1507 					    IFLIB_INTR_TX, txq,
1508 					    txq->me, irq_name);
1509 		txq->irqv = &sc->irqvs[vector];
1510 	}
1511 
1512 	return (0);
1513 fail:
1514 	for (; i >= 0; i--, vector--)
1515 		iflib_irq_free(ctx, &sc->irqvs[vector].irq);
1516 	iflib_irq_free(ctx, &sc->irqvs[0].irq);
1517 free_irqvs:
1518 	free(sc->irqvs, M_ICE);
1519 	sc->irqvs = NULL;
1520 	return err;
1521 }
1522 
1523 /**
1524  * ice_if_mtu_set - Set the device MTU
1525  * @ctx: iflib context structure
1526  * @mtu: the MTU requested
1527  *
1528  * Called by iflib to configure the device's Maximum Transmission Unit (MTU).
1529  *
1530  * @pre assumes the caller holds the iflib CTX lock
1531  */
1532 static int
1533 ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu)
1534 {
1535 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1536 
1537 	ASSERT_CTX_LOCKED(sc);
1538 
1539 	/* Do not support configuration when in recovery mode */
1540 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1541 		return (ENOSYS);
1542 
1543 	if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU)
1544 		return (EINVAL);
1545 
1546 	sc->scctx->isc_max_frame_size = mtu +
1547 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1548 
1549 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
1550 
1551 	return (0);
1552 }
1553 
1554 /**
1555  * ice_if_intr_enable - Enable device interrupts
1556  * @ctx: iflib context structure
1557  *
1558  * Called by iflib to request enabling device interrupts.
1559  */
1560 static void
1561 ice_if_intr_enable(if_ctx_t ctx)
1562 {
1563 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1564 	struct ice_vsi *vsi = &sc->pf_vsi;
1565 	struct ice_hw *hw = &sc->hw;
1566 
1567 	ASSERT_CTX_LOCKED(sc);
1568 
1569 	/* Enable ITR 0 */
1570 	ice_enable_intr(hw, sc->irqvs[0].me);
1571 
1572 	/* Do not enable queue interrupts in recovery mode */
1573 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1574 		return;
1575 
1576 	/* Enable all queue interrupts */
1577 	for (int i = 0; i < vsi->num_rx_queues; i++)
1578 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
1579 }
1580 
1581 /**
1582  * ice_if_intr_disable - Disable device interrupts
1583  * @ctx: iflib context structure
1584  *
1585  * Called by iflib to request disabling device interrupts.
1586  */
1587 static void
1588 ice_if_intr_disable(if_ctx_t ctx)
1589 {
1590 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1591 	struct ice_hw *hw = &sc->hw;
1592 	unsigned int i;
1593 
1594 	ASSERT_CTX_LOCKED(sc);
1595 
1596 	/* IFDI_INTR_DISABLE may be called prior to interrupts actually being
1597 	 * assigned to queues. Instead of assuming that the interrupt
1598 	 * assignment in the rx_queues structure is valid, just disable all
1599 	 * possible interrupts
1600 	 *
1601 	 * Note that we choose not to disable ITR 0 because this handles the
1602 	 * AdminQ interrupts, and we want to keep processing these even when
1603 	 * the interface is offline.
1604 	 */
1605 	for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++)
1606 		ice_disable_intr(hw, i);
1607 }
1608 
1609 /**
1610  * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
1611  * @ctx: iflib context structure
1612  * @rxqid: the Rx queue to enable
1613  *
1614  * Enable a specific Rx queue interrupt.
1615  *
1616  * This function is not protected by the iflib CTX lock.
1617  */
1618 static int
1619 ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
1620 {
1621 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1622 	struct ice_vsi *vsi = &sc->pf_vsi;
1623 	struct ice_hw *hw = &sc->hw;
1624 
1625 	/* Do not enable queue interrupts in recovery mode */
1626 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1627 		return (ENOSYS);
1628 
1629 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
1630 	return (0);
1631 }
1632 
1633 /**
1634  * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
1635  * @ctx: iflib context structure
1636  * @txqid: the Tx queue to enable
1637  *
1638  * Enable a specific Tx queue interrupt.
1639  *
1640  * This function is not protected by the iflib CTX lock.
1641  */
1642 static int
1643 ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
1644 {
1645 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1646 	struct ice_vsi *vsi = &sc->pf_vsi;
1647 	struct ice_hw *hw = &sc->hw;
1648 
1649 	/* Do not enable queue interrupts in recovery mode */
1650 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1651 		return (ENOSYS);
1652 
1653 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
1654 	return (0);
1655 }
1656 
1657 /**
1658  * ice_if_promisc_set - Set device promiscuous mode
1659  * @ctx: iflib context structure
1660  * @flags: promiscuous flags to configure
1661  *
1662  * Called by iflib to configure device promiscuous mode.
1663  *
1664  * @remark Calls to this function will always overwrite the previous setting
1665  */
1666 static int
1667 ice_if_promisc_set(if_ctx_t ctx, int flags)
1668 {
1669 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1670 	struct ice_hw *hw = &sc->hw;
1671 	device_t dev = sc->dev;
1672 	enum ice_status status;
1673 	bool promisc_enable = flags & IFF_PROMISC;
1674 	bool multi_enable = flags & IFF_ALLMULTI;
1675 
1676 	/* Do not support configuration when in recovery mode */
1677 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1678 		return (ENOSYS);
1679 
1680 	if (multi_enable)
1681 		return (EOPNOTSUPP);
1682 
1683 	if (promisc_enable) {
1684 		status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx,
1685 					     ICE_VSI_PROMISC_MASK, 0);
1686 		if (status && status != ICE_ERR_ALREADY_EXISTS) {
1687 			device_printf(dev,
1688 				      "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n",
1689 				      ice_status_str(status),
1690 				      ice_aq_str(hw->adminq.sq_last_status));
1691 			return (EIO);
1692 		}
1693 	} else {
1694 		status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx,
1695 					       ICE_VSI_PROMISC_MASK, 0);
1696 		if (status) {
1697 			device_printf(dev,
1698 				      "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n",
1699 				      ice_status_str(status),
1700 				      ice_aq_str(hw->adminq.sq_last_status));
1701 			return (EIO);
1702 		}
1703 	}
1704 
1705 	return (0);
1706 }
1707 
1708 /**
1709  * ice_if_media_change - Change device media
1710  * @ctx: device ctx structure
1711  *
1712  * Called by iflib when a media change is requested. This operation is not
1713  * supported by the hardware, so we just return an error code.
1714  */
1715 static int
1716 ice_if_media_change(if_ctx_t ctx)
1717 {
1718 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1719 
1720 	device_printf(sc->dev, "Media change is not supported.\n");
1721 	return (ENODEV);
1722 }
1723 
1724 /**
1725  * ice_if_media_status - Report current device media
1726  * @ctx: iflib context structure
1727  * @ifmr: ifmedia request structure to update
1728  *
1729  * Updates the provided ifmr with current device media status, including link
1730  * status and media type.
1731  */
1732 static void
1733 ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr)
1734 {
1735 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1736 	struct ice_link_status *li = &sc->hw.port_info->phy.link_info;
1737 
1738 	ifmr->ifm_status = IFM_AVALID;
1739 	ifmr->ifm_active = IFM_ETHER;
1740 
1741 	/* Never report link up or media types when in recovery mode */
1742 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1743 		return;
1744 
1745 	if (!sc->link_up)
1746 		return;
1747 
1748 	ifmr->ifm_status |= IFM_ACTIVE;
1749 	ifmr->ifm_active |= IFM_FDX;
1750 
1751 	if (li->phy_type_low)
1752 		ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low);
1753 	else if (li->phy_type_high)
1754 		ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high);
1755 	else
1756 		ifmr->ifm_active |= IFM_UNKNOWN;
1757 
1758 	/* Report flow control status as well */
1759 	if (li->an_info & ICE_AQ_LINK_PAUSE_TX)
1760 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1761 	if (li->an_info & ICE_AQ_LINK_PAUSE_RX)
1762 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1763 }
1764 
1765 /**
1766  * ice_init_tx_tracking - Initialize Tx queue software tracking values
1767  * @vsi: the VSI to initialize
1768  *
1769  * Initialize Tx queue software tracking values, including the Report Status
1770  * queue, and related software tracking values.
1771  */
1772 static void
1773 ice_init_tx_tracking(struct ice_vsi *vsi)
1774 {
1775 	struct ice_tx_queue *txq;
1776 	size_t j;
1777 	int i;
1778 
1779 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1780 
1781 		txq->tx_rs_cidx = txq->tx_rs_pidx = 0;
1782 
1783 		/* Initialize the last processed descriptor to be the end of
1784 		 * the ring, rather than the start, so that we avoid an
1785 		 * off-by-one error in ice_ift_txd_credits_update for the
1786 		 * first packet.
1787 		 */
1788 		txq->tx_cidx_processed = txq->desc_count - 1;
1789 
1790 		for (j = 0; j < txq->desc_count; j++)
1791 			txq->tx_rsq[j] = QIDX_INVALID;
1792 	}
1793 }
1794 
1795 /**
1796  * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues
1797  * @sc: the device softc
1798  *
1799  * Called to update the Rx queue mbuf_sz parameter for configuring the receive
1800  * buffer sizes when programming hardware.
1801  */
1802 static void
1803 ice_update_rx_mbuf_sz(struct ice_softc *sc)
1804 {
1805 	uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx);
1806 	struct ice_vsi *vsi = &sc->pf_vsi;
1807 
1808 	MPASS(mbuf_sz <= UINT16_MAX);
1809 	vsi->mbuf_sz = mbuf_sz;
1810 }
1811 
1812 /**
1813  * ice_if_init - Initialize the device
1814  * @ctx: iflib ctx structure
1815  *
1816  * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes
1817  * device filters and prepares the Tx and Rx engines.
1818  *
1819  * @pre assumes the caller holds the iflib CTX lock
1820  */
1821 static void
1822 ice_if_init(if_ctx_t ctx)
1823 {
1824 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1825 	device_t dev = sc->dev;
1826 	int err;
1827 
1828 	ASSERT_CTX_LOCKED(sc);
1829 
1830 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1831 		return;
1832 
1833 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
1834 		device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n");
1835 		return;
1836 	}
1837 
1838 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
1839 		device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n");
1840 		return;
1841 	}
1842 
1843 	ice_update_rx_mbuf_sz(sc);
1844 
1845 	/* Update the MAC address... User might use a LAA */
1846 	err = ice_update_laa_mac(sc);
1847 	if (err) {
1848 		device_printf(dev,
1849 			      "LAA address change failed, err %s\n",
1850 			      ice_err_str(err));
1851 		return;
1852 	}
1853 
1854 	/* Initialize software Tx tracking values */
1855 	ice_init_tx_tracking(&sc->pf_vsi);
1856 
1857 	err = ice_cfg_vsi_for_tx(&sc->pf_vsi);
1858 	if (err) {
1859 		device_printf(dev,
1860 			      "Unable to configure the main VSI for Tx: %s\n",
1861 			      ice_err_str(err));
1862 		return;
1863 	}
1864 
1865 	err = ice_cfg_vsi_for_rx(&sc->pf_vsi);
1866 	if (err) {
1867 		device_printf(dev,
1868 			      "Unable to configure the main VSI for Rx: %s\n",
1869 			      ice_err_str(err));
1870 		goto err_cleanup_tx;
1871 	}
1872 
1873 	err = ice_control_rx_queues(&sc->pf_vsi, true);
1874 	if (err) {
1875 		device_printf(dev,
1876 			      "Unable to enable Rx rings for transmit: %s\n",
1877 			      ice_err_str(err));
1878 		goto err_cleanup_tx;
1879 	}
1880 
1881 	err = ice_cfg_pf_default_mac_filters(sc);
1882 	if (err) {
1883 		device_printf(dev,
1884 			      "Unable to configure default MAC filters: %s\n",
1885 			      ice_err_str(err));
1886 		goto err_stop_rx;
1887 	}
1888 
1889 	/* We use software interrupts for Tx, so we only program the hardware
1890 	 * interrupts for Rx.
1891 	 */
1892 	ice_configure_rxq_interrupts(&sc->pf_vsi);
1893 	ice_configure_rx_itr(&sc->pf_vsi);
1894 
1895 	/* Configure promiscuous mode */
1896 	ice_if_promisc_set(ctx, if_getflags(sc->ifp));
1897 
1898 	ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
1899 	return;
1900 
1901 err_stop_rx:
1902 	ice_control_rx_queues(&sc->pf_vsi, false);
1903 err_cleanup_tx:
1904 	ice_vsi_disable_tx(&sc->pf_vsi);
1905 }
1906 
1907 /**
1908  * ice_poll_for_media_avail - Re-enable link if media is detected
1909  * @sc: device private structure
1910  *
1911  * Intended to be called from the driver's timer function, this function
1912  * sends the Get Link Status AQ command and re-enables HW link if the
1913  * command says that media is available.
1914  *
1915  * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing,
1916  * since media removal events are supposed to be sent to the driver through
1917  * a link status event.
1918  */
1919 static void
1920 ice_poll_for_media_avail(struct ice_softc *sc)
1921 {
1922 	struct ice_hw *hw = &sc->hw;
1923 	struct ice_port_info *pi = hw->port_info;
1924 
1925 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) {
1926 		pi->phy.get_link_info = true;
1927 		ice_get_link_status(pi, &sc->link_up);
1928 
1929 		if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
1930 			enum ice_status status;
1931 
1932 			/* Re-enable link and re-apply user link settings */
1933 			ice_apply_saved_phy_cfg(sc);
1934 
1935 			/* Update the OS about changes in media capability */
1936 			status = ice_add_media_types(sc, sc->media);
1937 			if (status)
1938 				device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
1939 					      ice_status_str(status),
1940 					      ice_aq_str(hw->adminq.sq_last_status));
1941 
1942 			ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
1943 		}
1944 	}
1945 }
1946 
1947 /**
1948  * ice_if_timer - called by iflib periodically
1949  * @ctx: iflib ctx structure
1950  * @qid: the queue this timer was called for
1951  *
1952  * This callback is triggered by iflib periodically. We use it to update the
1953  * hw statistics.
1954  *
1955  * @remark this function is not protected by the iflib CTX lock.
1956  */
1957 static void
1958 ice_if_timer(if_ctx_t ctx, uint16_t qid)
1959 {
1960 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1961 	uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx;
1962 
1963 	if (qid != 0)
1964 		return;
1965 
1966 	/* Do not attempt to update stats when in recovery mode */
1967 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1968 		return;
1969 
1970 	/* Update device statistics */
1971 	ice_update_pf_stats(sc);
1972 
1973 	/*
1974 	 * For proper watchdog management, the iflib stack needs to know if
1975 	 * we've been paused during the last interval. Check if the
1976 	 * link_xoff_rx stat changed, and set the isc_pause_frames, if so.
1977 	 */
1978 	if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx)
1979 		sc->scctx->isc_pause_frames = 1;
1980 
1981 	/* Update the primary VSI stats */
1982 	ice_update_vsi_hw_stats(&sc->pf_vsi);
1983 }
1984 
1985 /**
1986  * ice_admin_timer - called periodically to trigger the admin task
1987  * @arg: callout(9) argument pointing to the device private softc structure
1988  *
1989  * Timer function used as part of a callout(9) timer that will periodically
1990  * trigger the admin task, even when the interface is down.
1991  *
1992  * @remark this function is not called by iflib and is not protected by the
1993  * iflib CTX lock.
1994  *
1995  * @remark because this is a callout function, it cannot sleep and should not
1996  * attempt taking the iflib CTX lock.
1997  */
1998 static void
1999 ice_admin_timer(void *arg)
2000 {
2001 	struct ice_softc *sc = (struct ice_softc *)arg;
2002 
2003 	/* Fire off the admin task */
2004 	iflib_admin_intr_deferred(sc->ctx);
2005 
2006 	/* Reschedule the admin timer */
2007 	callout_schedule(&sc->admin_timer, hz/2);
2008 }
2009 
2010 /**
2011  * ice_transition_recovery_mode - Transition to recovery mode
2012  * @sc: the device private softc
2013  *
2014  * Called when the driver detects that the firmware has entered recovery mode
2015  * at run time.
2016  */
2017 static void
2018 ice_transition_recovery_mode(struct ice_softc *sc)
2019 {
2020 	struct ice_vsi *vsi = &sc->pf_vsi;
2021 	int i;
2022 
2023 	device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
2024 
2025 	/* Tell the stack that the link has gone down */
2026 	iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
2027 
2028 	/* Request that the device be re-initialized */
2029 	ice_request_stack_reinit(sc);
2030 
2031 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2032 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2033 
2034 	ice_vsi_del_txqs_ctx(vsi);
2035 	ice_vsi_del_rxqs_ctx(vsi);
2036 
2037 	for (i = 0; i < sc->num_available_vsi; i++) {
2038 		if (sc->all_vsi[i])
2039 			ice_release_vsi(sc->all_vsi[i]);
2040 	}
2041 	sc->num_available_vsi = 0;
2042 
2043 	if (sc->all_vsi) {
2044 		free(sc->all_vsi, M_ICE);
2045 		sc->all_vsi = NULL;
2046 	}
2047 
2048 	/* Destroy the interrupt manager */
2049 	ice_resmgr_destroy(&sc->imgr);
2050 	/* Destroy the queue managers */
2051 	ice_resmgr_destroy(&sc->tx_qmgr);
2052 	ice_resmgr_destroy(&sc->rx_qmgr);
2053 
2054 	ice_deinit_hw(&sc->hw);
2055 }
2056 
2057 /**
2058  * ice_transition_safe_mode - Transition to safe mode
2059  * @sc: the device private softc
2060  *
2061  * Called when the driver attempts to reload the DDP package during a device
2062  * reset, and the new download fails. If so, we must transition to safe mode
2063  * at run time.
2064  *
2065  * @remark although safe mode normally allocates only a single queue, we can't
2066  * change the number of queues dynamically when using iflib. Due to this, we
2067  * do not attempt to reduce the number of queues.
2068  */
2069 static void
2070 ice_transition_safe_mode(struct ice_softc *sc)
2071 {
2072 	/* Indicate that we are in Safe mode */
2073 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
2074 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
2075 
2076 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2077 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2078 
2079 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2080 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en);
2081 }
2082 
2083 /**
2084  * ice_if_update_admin_status - update admin status
2085  * @ctx: iflib ctx structure
2086  *
2087  * Called by iflib to update the admin status. For our purposes, this means
2088  * check the adminq, and update the link status. It's ultimately triggered by
2089  * our admin interrupt, or by the ice_if_timer periodically.
2090  *
2091  * @pre assumes the caller holds the iflib CTX lock
2092  */
2093 static void
2094 ice_if_update_admin_status(if_ctx_t ctx)
2095 {
2096 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2097 	enum ice_fw_modes fw_mode;
2098 	bool reschedule = false;
2099 	u16 pending = 0;
2100 
2101 	ASSERT_CTX_LOCKED(sc);
2102 
2103 	/* Check if the firmware entered recovery mode at run time */
2104 	fw_mode = ice_get_fw_mode(&sc->hw);
2105 	if (fw_mode == ICE_FW_MODE_REC) {
2106 		if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2107 			/* If we just entered recovery mode, log a warning to
2108 			 * the system administrator and deinit driver state
2109 			 * that is no longer functional.
2110 			 */
2111 			ice_transition_recovery_mode(sc);
2112 		}
2113 	} else if (fw_mode == ICE_FW_MODE_ROLLBACK) {
2114 		if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) {
2115 			/* Rollback mode isn't fatal, but we don't want to
2116 			 * repeatedly post a message about it.
2117 			 */
2118 			ice_print_rollback_msg(&sc->hw);
2119 		}
2120 	}
2121 
2122 	/* Handle global reset events */
2123 	ice_handle_reset_event(sc);
2124 
2125 	/* Handle PF reset requests */
2126 	ice_handle_pf_reset_request(sc);
2127 
2128 	/* Handle MDD events */
2129 	ice_handle_mdd_event(sc);
2130 
2131 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) ||
2132 	    ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) ||
2133 	    ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2134 		/*
2135 		 * If we know the control queues are disabled, skip processing
2136 		 * the control queues entirely.
2137 		 */
2138 		;
2139 	} else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) {
2140 		ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending);
2141 		if (pending > 0)
2142 			reschedule = true;
2143 
2144 		ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending);
2145 		if (pending > 0)
2146 			reschedule = true;
2147 	}
2148 
2149 	/* Poll for link up */
2150 	ice_poll_for_media_avail(sc);
2151 
2152 	/* Check and update link status */
2153 	ice_update_link_status(sc, false);
2154 
2155 	/*
2156 	 * If there are still messages to process, we need to reschedule
2157 	 * ourselves. Otherwise, we can just re-enable the interrupt. We'll be
2158 	 * woken up at the next interrupt or timer event.
2159 	 */
2160 	if (reschedule) {
2161 		ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
2162 		iflib_admin_intr_deferred(ctx);
2163 	} else {
2164 		ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2165 	}
2166 }
2167 
2168 /**
2169  * ice_prepare_for_reset - Prepare device for an impending reset
2170  * @sc: The device private softc
2171  *
2172  * Prepare the driver for an impending reset, shutting down VSIs, clearing the
2173  * scheduler setup, and shutting down controlqs. Uses the
2174  * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the
2175  * driver for reset or not.
2176  */
2177 static void
2178 ice_prepare_for_reset(struct ice_softc *sc)
2179 {
2180 	struct ice_hw *hw = &sc->hw;
2181 
2182 	/* If we're already prepared, there's nothing to do */
2183 	if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET))
2184 		return;
2185 
2186 	log(LOG_INFO, "%s: preparing to reset device logic\n", sc->ifp->if_xname);
2187 
2188 	/* In recovery mode, hardware is not initialized */
2189 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2190 		return;
2191 
2192 	/* Release the main PF VSI queue mappings */
2193 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2194 				    sc->pf_vsi.num_tx_queues);
2195 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2196 				    sc->pf_vsi.num_rx_queues);
2197 
2198 	ice_clear_hw_tbls(hw);
2199 
2200 	if (hw->port_info)
2201 		ice_sched_clear_port(hw->port_info);
2202 
2203 	ice_shutdown_all_ctrlq(hw);
2204 }
2205 
2206 /**
2207  * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping
2208  * @sc: the device softc pointer
2209  *
2210  * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue
2211  * mapping after a reset occurred.
2212  */
2213 static int
2214 ice_rebuild_pf_vsi_qmap(struct ice_softc *sc)
2215 {
2216 	struct ice_vsi *vsi = &sc->pf_vsi;
2217 	struct ice_tx_queue *txq;
2218 	struct ice_rx_queue *rxq;
2219 	int err, i;
2220 
2221 	/* Re-assign Tx queues from PF space to the main VSI */
2222 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap,
2223 					    vsi->num_tx_queues);
2224 	if (err) {
2225 		device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n",
2226 			      ice_err_str(err));
2227 		return (err);
2228 	}
2229 
2230 	/* Re-assign Rx queues from PF space to this VSI */
2231 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap,
2232 					    vsi->num_rx_queues);
2233 	if (err) {
2234 		device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n",
2235 			      ice_err_str(err));
2236 		goto err_release_tx_queues;
2237 	}
2238 
2239 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
2240 
2241 	/* Re-assign Tx queue tail pointers */
2242 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
2243 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
2244 
2245 	/* Re-assign Rx queue tail pointers */
2246 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
2247 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
2248 
2249 	return (0);
2250 
2251 err_release_tx_queues:
2252 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2253 				   sc->pf_vsi.num_tx_queues);
2254 
2255 	return (err);
2256 }
2257 
2258 /* determine if the iflib context is active */
2259 #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING))
2260 
2261 /**
2262  * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode
2263  * @sc: The device private softc
2264  *
2265  * Handle a driver rebuild while in recovery mode. This will only rebuild the
2266  * limited functionality supported while in recovery mode.
2267  */
2268 static void
2269 ice_rebuild_recovery_mode(struct ice_softc *sc)
2270 {
2271 	device_t dev = sc->dev;
2272 
2273 	/* enable PCIe bus master */
2274 	pci_enable_busmaster(dev);
2275 
2276 	/* Configure interrupt causes for the administrative interrupt */
2277 	ice_configure_misc_interrupts(sc);
2278 
2279 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2280 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2281 
2282 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2283 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2284 
2285 	log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname);
2286 
2287 	/* In order to completely restore device functionality, the iflib core
2288 	 * needs to be reset. We need to request an iflib reset. Additionally,
2289 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2290 	 * the iflib core, we also want re-run the admin task so that iflib
2291 	 * resets immediately instead of waiting for the next interrupt.
2292 	 */
2293 	ice_request_stack_reinit(sc);
2294 
2295 	return;
2296 }
2297 
2298 /**
2299  * ice_rebuild - Rebuild driver state post reset
2300  * @sc: The device private softc
2301  *
2302  * Restore driver state after a reset occurred. Restart the controlqs, setup
2303  * the hardware port, and re-enable the VSIs.
2304  */
2305 static void
2306 ice_rebuild(struct ice_softc *sc)
2307 {
2308 	struct ice_hw *hw = &sc->hw;
2309 	device_t dev = sc->dev;
2310 	enum ice_status status;
2311 	int err;
2312 
2313 	sc->rebuild_ticks = ticks;
2314 
2315 	/* If we're rebuilding, then a reset has succeeded. */
2316 	ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED);
2317 
2318 	/*
2319 	 * If the firmware is in recovery mode, only restore the limited
2320 	 * functionality supported by recovery mode.
2321 	 */
2322 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2323 		ice_rebuild_recovery_mode(sc);
2324 		return;
2325 	}
2326 
2327 	/* enable PCIe bus master */
2328 	pci_enable_busmaster(dev);
2329 
2330 	status = ice_init_all_ctrlq(hw);
2331 	if (status) {
2332 		device_printf(dev, "failed to re-init controlqs, err %s\n",
2333 			      ice_status_str(status));
2334 		goto err_shutdown_ctrlq;
2335 	}
2336 
2337 	/* Query the allocated resources for Tx scheduler */
2338 	status = ice_sched_query_res_alloc(hw);
2339 	if (status) {
2340 		device_printf(dev,
2341 			      "Failed to query scheduler resources, err %s aq_err %s\n",
2342 			      ice_status_str(status),
2343 			      ice_aq_str(hw->adminq.sq_last_status));
2344 		goto err_shutdown_ctrlq;
2345 	}
2346 
2347 	err = ice_send_version(sc);
2348 	if (err)
2349 		goto err_shutdown_ctrlq;
2350 
2351 	err = ice_init_link_events(sc);
2352 	if (err) {
2353 		device_printf(dev, "ice_init_link_events failed: %s\n",
2354 			      ice_err_str(err));
2355 		goto err_shutdown_ctrlq;
2356 	}
2357 
2358 	status = ice_clear_pf_cfg(hw);
2359 	if (status) {
2360 		device_printf(dev, "failed to clear PF configuration, err %s\n",
2361 			      ice_status_str(status));
2362 		goto err_shutdown_ctrlq;
2363 	}
2364 
2365 	ice_clear_pxe_mode(hw);
2366 
2367 	status = ice_get_caps(hw);
2368 	if (status) {
2369 		device_printf(dev, "failed to get capabilities, err %s\n",
2370 			      ice_status_str(status));
2371 		goto err_shutdown_ctrlq;
2372 	}
2373 
2374 	status = ice_sched_init_port(hw->port_info);
2375 	if (status) {
2376 		device_printf(dev, "failed to initialize port, err %s\n",
2377 			      ice_status_str(status));
2378 		goto err_sched_cleanup;
2379 	}
2380 
2381 	/* If we previously loaded the package, it needs to be reloaded now */
2382 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) {
2383 		status = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
2384 		if (status) {
2385 			ice_log_pkg_init(sc, &status);
2386 
2387 			ice_transition_safe_mode(sc);
2388 		}
2389 	}
2390 
2391 	ice_reset_pf_stats(sc);
2392 
2393 	err = ice_rebuild_pf_vsi_qmap(sc);
2394 	if (err) {
2395 		device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n",
2396 			      ice_err_str(err));
2397 		goto err_sched_cleanup;
2398 	}
2399 	err = ice_initialize_vsi(&sc->pf_vsi);
2400 	if (err) {
2401 		device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n",
2402 			      ice_err_str(err));
2403 		goto err_release_queue_allocations;
2404 	}
2405 
2406 	/* Replay all VSI configuration */
2407 	err = ice_replay_all_vsi_cfg(sc);
2408 	if (err)
2409 		goto err_deinit_pf_vsi;
2410 
2411 	/* Reconfigure the main PF VSI for RSS */
2412 	err = ice_config_rss(&sc->pf_vsi);
2413 	if (err) {
2414 		device_printf(sc->dev,
2415 			      "Unable to reconfigure RSS for the main VSI, err %s\n",
2416 			      ice_err_str(err));
2417 		goto err_deinit_pf_vsi;
2418 	}
2419 
2420 	/* Refresh link status */
2421 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2422 	sc->hw.port_info->phy.get_link_info = true;
2423 	ice_get_link_status(sc->hw.port_info, &sc->link_up);
2424 	ice_update_link_status(sc, true);
2425 
2426 	/* Configure interrupt causes for the administrative interrupt */
2427 	ice_configure_misc_interrupts(sc);
2428 
2429 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2430 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2431 
2432 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2433 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2434 
2435 	log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname);
2436 
2437 	/* In order to completely restore device functionality, the iflib core
2438 	 * needs to be reset. We need to request an iflib reset. Additionally,
2439 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2440 	 * the iflib core, we also want re-run the admin task so that iflib
2441 	 * resets immediately instead of waiting for the next interrupt.
2442 	 */
2443 	ice_request_stack_reinit(sc);
2444 
2445 	return;
2446 
2447 err_deinit_pf_vsi:
2448 	ice_deinit_vsi(&sc->pf_vsi);
2449 err_release_queue_allocations:
2450 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2451 				    sc->pf_vsi.num_tx_queues);
2452 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2453 				    sc->pf_vsi.num_rx_queues);
2454 err_sched_cleanup:
2455 	ice_sched_cleanup_all(hw);
2456 err_shutdown_ctrlq:
2457 	ice_shutdown_all_ctrlq(hw);
2458 	ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2459 	device_printf(dev, "Driver rebuild failed, please reload the device driver\n");
2460 }
2461 
2462 /**
2463  * ice_handle_reset_event - Handle reset events triggered by OICR
2464  * @sc: The device private softc
2465  *
2466  * Handle reset events triggered by an OICR notification. This includes CORER,
2467  * GLOBR, and EMPR resets triggered by software on this or any other PF or by
2468  * firmware.
2469  *
2470  * @pre assumes the iflib context lock is held, and will unlock it while
2471  * waiting for the hardware to finish reset.
2472  */
2473 static void
2474 ice_handle_reset_event(struct ice_softc *sc)
2475 {
2476 	struct ice_hw *hw = &sc->hw;
2477 	enum ice_status status;
2478 	device_t dev = sc->dev;
2479 
2480 	/* When a CORER, GLOBR, or EMPR is about to happen, the hardware will
2481 	 * trigger an OICR interrupt. Our OICR handler will determine when
2482 	 * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as
2483 	 * appropriate.
2484 	 */
2485 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
2486 		return;
2487 
2488 	ice_prepare_for_reset(sc);
2489 
2490 	/*
2491 	 * Release the iflib context lock and wait for the device to finish
2492 	 * resetting.
2493 	 */
2494 	IFLIB_CTX_UNLOCK(sc);
2495 	status = ice_check_reset(hw);
2496 	IFLIB_CTX_LOCK(sc);
2497 	if (status) {
2498 		device_printf(dev, "Device never came out of reset, err %s\n",
2499 			      ice_status_str(status));
2500 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2501 		return;
2502 	}
2503 
2504 	/* We're done with the reset, so we can rebuild driver state */
2505 	sc->hw.reset_ongoing = false;
2506 	ice_rebuild(sc);
2507 
2508 	/* In the unlikely event that a PF reset request occurs at the same
2509 	 * time as a global reset, clear the request now. This avoids
2510 	 * resetting a second time right after we reset due to a global event.
2511 	 */
2512 	if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2513 		device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n");
2514 }
2515 
2516 /**
2517  * ice_handle_pf_reset_request - Initiate PF reset requested by software
2518  * @sc: The device private softc
2519  *
2520  * Initiate a PF reset requested by software. We handle this in the admin task
2521  * so that only one thread actually handles driver preparation and cleanup,
2522  * rather than having multiple threads possibly attempt to run this code
2523  * simultaneously.
2524  *
2525  * @pre assumes the iflib context lock is held and will unlock it while
2526  * waiting for the PF reset to complete.
2527  */
2528 static void
2529 ice_handle_pf_reset_request(struct ice_softc *sc)
2530 {
2531 	struct ice_hw *hw = &sc->hw;
2532 	enum ice_status status;
2533 
2534 	/* Check for PF reset requests */
2535 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2536 		return;
2537 
2538 	/* Make sure we're prepared for reset */
2539 	ice_prepare_for_reset(sc);
2540 
2541 	/*
2542 	 * Release the iflib context lock and wait for the device to finish
2543 	 * resetting.
2544 	 */
2545 	IFLIB_CTX_UNLOCK(sc);
2546 	status = ice_reset(hw, ICE_RESET_PFR);
2547 	IFLIB_CTX_LOCK(sc);
2548 	if (status) {
2549 		device_printf(sc->dev, "device PF reset failed, err %s\n",
2550 			      ice_status_str(status));
2551 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2552 		return;
2553 	}
2554 
2555 	sc->soft_stats.pfr_count++;
2556 	ice_rebuild(sc);
2557 }
2558 
2559 /**
2560  * ice_init_device_features - Init device driver features
2561  * @sc: driver softc structure
2562  *
2563  * @pre assumes that the function capabilities bits have been set up by
2564  * ice_init_hw().
2565  */
2566 static void
2567 ice_init_device_features(struct ice_softc *sc)
2568 {
2569 	/*
2570 	 * A failed pkg file download triggers safe mode, disabling advanced
2571 	 * device feature support
2572 	 */
2573 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE))
2574 		return;
2575 
2576 	/* Set capabilities that the driver supports */
2577 	ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2578 	ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
2579 	ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
2580 	ice_set_bit(ICE_FEATURE_DEFAULT_OVERRIDE, sc->feat_cap);
2581 
2582 	/* Disable features due to hardware limitations... */
2583 	if (!sc->hw.func_caps.common_cap.rss_table_size)
2584 		ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2585 
2586 	/* Disable capabilities not supported by the OS */
2587 	ice_disable_unsupported_features(sc->feat_cap);
2588 
2589 	/* RSS is always enabled for iflib */
2590 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS))
2591 		ice_set_bit(ICE_FEATURE_RSS, sc->feat_en);
2592 }
2593 
2594 /**
2595  * ice_if_multi_set - Callback to update Multicast filters in HW
2596  * @ctx: iflib ctx structure
2597  *
2598  * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search
2599  * the if_multiaddrs list and determine which filters have been added or
2600  * removed from the list, and update HW programming to reflect the new list.
2601  *
2602  * @pre assumes the caller holds the iflib CTX lock
2603  */
2604 static void
2605 ice_if_multi_set(if_ctx_t ctx)
2606 {
2607 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2608 	int err;
2609 
2610 	ASSERT_CTX_LOCKED(sc);
2611 
2612 	/* Do not handle multicast configuration in recovery mode */
2613 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2614 		return;
2615 
2616 	err = ice_sync_multicast_filters(sc);
2617 	if (err) {
2618 		device_printf(sc->dev,
2619 			      "Failed to synchronize multicast filter list: %s\n",
2620 			      ice_err_str(err));
2621 		return;
2622 	}
2623 }
2624 
2625 /**
2626  * ice_if_vlan_register - Register a VLAN with the hardware
2627  * @ctx: iflib ctx pointer
2628  * @vtag: VLAN to add
2629  *
2630  * Programs the main PF VSI with a hardware filter for the given VLAN.
2631  *
2632  * @pre assumes the caller holds the iflib CTX lock
2633  */
2634 static void
2635 ice_if_vlan_register(if_ctx_t ctx, u16 vtag)
2636 {
2637 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2638 	enum ice_status status;
2639 
2640 	ASSERT_CTX_LOCKED(sc);
2641 
2642 	/* Do not handle VLAN configuration in recovery mode */
2643 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2644 		return;
2645 
2646 	status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag);
2647 	if (status) {
2648 		device_printf(sc->dev,
2649 			      "Failure adding VLAN %d to main VSI, err %s aq_err %s\n",
2650 			      vtag, ice_status_str(status),
2651 			      ice_aq_str(sc->hw.adminq.sq_last_status));
2652 	}
2653 }
2654 
2655 /**
2656  * ice_if_vlan_unregister - Remove a VLAN filter from the hardware
2657  * @ctx: iflib ctx pointer
2658  * @vtag: VLAN to add
2659  *
2660  * Removes the previously programmed VLAN filter from the main PF VSI.
2661  *
2662  * @pre assumes the caller holds the iflib CTX lock
2663  */
2664 static void
2665 ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag)
2666 {
2667 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2668 	enum ice_status status;
2669 
2670 	ASSERT_CTX_LOCKED(sc);
2671 
2672 	/* Do not handle VLAN configuration in recovery mode */
2673 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2674 		return;
2675 
2676 	status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag);
2677 	if (status) {
2678 		device_printf(sc->dev,
2679 			      "Failure removing VLAN %d from main VSI, err %s aq_err %s\n",
2680 			      vtag, ice_status_str(status),
2681 			      ice_aq_str(sc->hw.adminq.sq_last_status));
2682 	}
2683 }
2684 
2685 /**
2686  * ice_if_stop - Stop the device
2687  * @ctx: iflib context structure
2688  *
2689  * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0
2690  * down)
2691  *
2692  * @pre assumes the caller holds the iflib CTX lock
2693  */
2694 static void
2695 ice_if_stop(if_ctx_t ctx)
2696 {
2697 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2698 
2699 	ASSERT_CTX_LOCKED(sc);
2700 
2701 	/*
2702 	 * The iflib core may call IFDI_STOP prior to the first call to
2703 	 * IFDI_INIT. This will cause us to attempt to remove MAC filters we
2704 	 * don't have, and disable Tx queues which aren't yet configured.
2705 	 * Although it is likely these extra operations are harmless, they do
2706 	 * cause spurious warning messages to be displayed, which may confuse
2707 	 * users.
2708 	 *
2709 	 * To avoid these messages, we use a state bit indicating if we've
2710 	 * been initialized. It will be set when ice_if_init is called, and
2711 	 * cleared here in ice_if_stop.
2712 	 */
2713 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
2714 		return;
2715 
2716 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
2717 		device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n");
2718 		return;
2719 	}
2720 
2721 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
2722 		device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n");
2723 		return;
2724 	}
2725 
2726 	/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
2727 	 * return of these functions because there's nothing we can really do
2728 	 * if they fail, and the functions already print error messages.
2729 	 * Just try to shut down as much as we can.
2730 	 */
2731 	ice_rm_pf_default_mac_filters(sc);
2732 
2733 	/* Dissociate the Tx and Rx queues from the interrupts */
2734 	ice_flush_txq_interrupts(&sc->pf_vsi);
2735 	ice_flush_rxq_interrupts(&sc->pf_vsi);
2736 
2737 	/* Disable the Tx and Rx queues */
2738 	ice_vsi_disable_tx(&sc->pf_vsi);
2739 	ice_control_rx_queues(&sc->pf_vsi, false);
2740 }
2741 
2742 /**
2743  * ice_if_get_counter - Get current value of an ifnet statistic
2744  * @ctx: iflib context pointer
2745  * @counter: ifnet counter to read
2746  *
2747  * Reads the current value of an ifnet counter for the device.
2748  *
2749  * This function is not protected by the iflib CTX lock.
2750  */
2751 static uint64_t
2752 ice_if_get_counter(if_ctx_t ctx, ift_counter counter)
2753 {
2754 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2755 
2756 	/* Return the counter for the main PF VSI */
2757 	return ice_get_ifnet_counter(&sc->pf_vsi, counter);
2758 }
2759 
2760 /**
2761  * ice_request_stack_reinit - Request that iflib re-initialize
2762  * @sc: the device private softc
2763  *
2764  * Request that the device be brought down and up, to re-initialize. For
2765  * example, this may be called when a device reset occurs, or when Tx and Rx
2766  * queues need to be re-initialized.
2767  *
2768  * This is required because the iflib state is outside the driver, and must be
2769  * re-initialized if we need to resart Tx and Rx queues.
2770  */
2771 void
2772 ice_request_stack_reinit(struct ice_softc *sc)
2773 {
2774 	if (CTX_ACTIVE(sc->ctx)) {
2775 		iflib_request_reset(sc->ctx);
2776 		iflib_admin_intr_deferred(sc->ctx);
2777 	}
2778 }
2779 
2780 /**
2781  * ice_driver_is_detaching - Check if the driver is detaching/unloading
2782  * @sc: device private softc
2783  *
2784  * Returns true if the driver is detaching, false otherwise.
2785  *
2786  * @remark on newer kernels, take advantage of iflib_in_detach in order to
2787  * report detachment correctly as early as possible.
2788  *
2789  * @remark this function is used by various code paths that want to avoid
2790  * running if the driver is about to be removed. This includes sysctls and
2791  * other driver access points. Note that it does not fully resolve
2792  * detach-based race conditions as it is possible for a thread to race with
2793  * iflib_in_detach.
2794  */
2795 bool
2796 ice_driver_is_detaching(struct ice_softc *sc)
2797 {
2798 	return (ice_test_state(&sc->state, ICE_STATE_DETACHING) ||
2799 		iflib_in_detach(sc->ctx));
2800 }
2801 
2802 /**
2803  * ice_if_priv_ioctl - Device private ioctl handler
2804  * @ctx: iflib context pointer
2805  * @command: The ioctl command issued
2806  * @data: ioctl specific data
2807  *
2808  * iflib callback for handling custom driver specific ioctls.
2809  *
2810  * @pre Assumes that the iflib context lock is held.
2811  */
2812 static int
2813 ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
2814 {
2815 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2816 	struct ifdrv *ifd;
2817 	device_t dev = sc->dev;
2818 
2819 	if (data == NULL)
2820 		return (EINVAL);
2821 
2822 	ASSERT_CTX_LOCKED(sc);
2823 
2824 	/* Make sure the command type is valid */
2825 	switch (command) {
2826 	case SIOCSDRVSPEC:
2827 	case SIOCGDRVSPEC:
2828 		/* Accepted commands */
2829 		break;
2830 	case SIOCGPRIVATE_0:
2831 		/*
2832 		 * Although we do not support this ioctl command, it's
2833 		 * expected that iflib will forward it to the IFDI_PRIV_IOCTL
2834 		 * handler. Do not print a message in this case
2835 		 */
2836 		return (ENOTSUP);
2837 	default:
2838 		/*
2839 		 * If we get a different command for this function, it's
2840 		 * definitely unexpected, so log a message indicating what
2841 		 * command we got for debugging purposes.
2842 		 */
2843 		device_printf(dev, "%s: unexpected ioctl command %08lx\n",
2844 			      __func__, command);
2845 		return (EINVAL);
2846 	}
2847 
2848 	ifd = (struct ifdrv *)data;
2849 
2850 	switch (ifd->ifd_cmd) {
2851 	case ICE_NVM_ACCESS:
2852 		return ice_handle_nvm_access_ioctl(sc, ifd);
2853 	default:
2854 		return EINVAL;
2855 	}
2856 }
2857 
2858 /**
2859  * ice_if_i2c_req - I2C request handler for iflib
2860  * @ctx: iflib context pointer
2861  * @req: The I2C parameters to use
2862  *
2863  * Read from the port's I2C eeprom using the parameters from the ioctl.
2864  *
2865  * @remark The iflib-only part is pretty simple.
2866  */
2867 static int
2868 ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req)
2869 {
2870 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2871 
2872 	return ice_handle_i2c_req(sc, req);
2873 }
2874 
2875