xref: /freebsd/sys/dev/ice/if_ice_iflib.c (revision e63d20b70ee1dbee9b075f29de6f30cdcfe1abe1)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2024, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /**
33  * @file if_ice_iflib.c
34  * @brief iflib driver implementation
35  *
36  * Contains the main entry point for the iflib driver implementation. It
37  * implements the various ifdi driver methods, and sets up the module and
38  * driver values to load an iflib driver.
39  */
40 
41 #include "ice_iflib.h"
42 #include "ice_drv_info.h"
43 #include "ice_switch.h"
44 #include "ice_sched.h"
45 
46 #include <sys/module.h>
47 #include <sys/sockio.h>
48 #include <sys/smp.h>
49 #include <dev/pci/pcivar.h>
50 #include <dev/pci/pcireg.h>
51 
52 /*
53  * Device method prototypes
54  */
55 
56 static void *ice_register(device_t);
57 static int  ice_if_attach_pre(if_ctx_t);
58 static int  ice_attach_pre_recovery_mode(struct ice_softc *sc);
59 static int  ice_if_attach_post(if_ctx_t);
60 static void ice_attach_post_recovery_mode(struct ice_softc *sc);
61 static int  ice_if_detach(if_ctx_t);
62 static int  ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets);
63 static int  ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets);
64 static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix);
65 static void ice_if_queues_free(if_ctx_t ctx);
66 static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu);
67 static void ice_if_intr_enable(if_ctx_t ctx);
68 static void ice_if_intr_disable(if_ctx_t ctx);
69 static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
70 static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
71 static int ice_if_promisc_set(if_ctx_t ctx, int flags);
72 static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
73 static int ice_if_media_change(if_ctx_t ctx);
74 static void ice_if_init(if_ctx_t ctx);
75 static void ice_if_timer(if_ctx_t ctx, uint16_t qid);
76 static void ice_if_update_admin_status(if_ctx_t ctx);
77 static void ice_if_multi_set(if_ctx_t ctx);
78 static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag);
79 static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
80 static void ice_if_stop(if_ctx_t ctx);
81 static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter);
82 static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
83 static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
84 static int ice_if_suspend(if_ctx_t ctx);
85 static int ice_if_resume(if_ctx_t ctx);
86 static bool ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event);
87 static int ice_setup_mirror_vsi(struct ice_mirr_if *mif);
88 static int ice_wire_mirror_intrs(struct ice_mirr_if *mif);
89 static void ice_free_irqvs_subif(struct ice_mirr_if *mif);
90 static void *ice_subif_register(device_t);
91 static void ice_subif_setup_scctx(struct ice_mirr_if *mif);
92 static int ice_subif_rebuild(struct ice_softc *sc);
93 static int ice_subif_rebuild_vsi_qmap(struct ice_softc *sc);
94 
95 /* Iflib API */
96 static int ice_subif_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
97     uint64_t *paddrs, int ntxqs, int ntxqsets);
98 static int ice_subif_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
99     uint64_t *paddrs, int nrxqs, int nrxqsets);
100 static int ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
101 static int ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
102 static void ice_subif_if_intr_enable(if_ctx_t ctx);
103 static int ice_subif_if_msix_intr_assign(if_ctx_t ctx, int msix);
104 static void ice_subif_if_init(if_ctx_t ctx);
105 static void ice_subif_if_stop(if_ctx_t ctx);
106 static void ice_subif_if_queues_free(if_ctx_t ctx);
107 static int ice_subif_if_attach_pre(if_ctx_t);
108 static int ice_subif_if_attach_post(if_ctx_t);
109 static void ice_subif_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
110 static int ice_subif_if_promisc_set(if_ctx_t ctx, int flags);
111 
112 static int ice_msix_que(void *arg);
113 static int ice_msix_admin(void *arg);
114 
115 /*
116  * Helper function prototypes
117  */
118 static int ice_pci_mapping(struct ice_softc *sc);
119 static void ice_free_pci_mapping(struct ice_softc *sc);
120 static void ice_update_link_status(struct ice_softc *sc, bool update_media);
121 static void ice_init_device_features(struct ice_softc *sc);
122 static void ice_init_tx_tracking(struct ice_vsi *vsi);
123 static void ice_handle_reset_event(struct ice_softc *sc);
124 static void ice_handle_pf_reset_request(struct ice_softc *sc);
125 static void ice_prepare_for_reset(struct ice_softc *sc);
126 static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc);
127 static void ice_rebuild(struct ice_softc *sc);
128 static void ice_rebuild_recovery_mode(struct ice_softc *sc);
129 static void ice_free_irqvs(struct ice_softc *sc);
130 static void ice_update_rx_mbuf_sz(struct ice_softc *sc);
131 static void ice_poll_for_media_avail(struct ice_softc *sc);
132 static void ice_setup_scctx(struct ice_softc *sc);
133 static int ice_allocate_msix(struct ice_softc *sc);
134 static void ice_admin_timer(void *arg);
135 static void ice_transition_recovery_mode(struct ice_softc *sc);
136 static void ice_transition_safe_mode(struct ice_softc *sc);
137 static void ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask);
138 
139 /*
140  * Device Interface Declaration
141  */
142 
143 /**
144  * @var ice_methods
145  * @brief ice driver method entry points
146  *
147  * List of device methods implementing the generic device interface used by
148  * the device stack to interact with the ice driver. Since this is an iflib
149  * driver, most of the methods point to the generic iflib implementation.
150  */
151 static device_method_t ice_methods[] = {
152 	/* Device interface */
153 	DEVMETHOD(device_register, ice_register),
154 	DEVMETHOD(device_probe,    iflib_device_probe_vendor),
155 	DEVMETHOD(device_attach,   iflib_device_attach),
156 	DEVMETHOD(device_detach,   iflib_device_detach),
157 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
158 	DEVMETHOD(device_suspend,  iflib_device_suspend),
159 	DEVMETHOD(device_resume,   iflib_device_resume),
160 	DEVMETHOD_END
161 };
162 
163 /**
164  * @var ice_iflib_methods
165  * @brief iflib method entry points
166  *
167  * List of device methods used by the iflib stack to interact with this
168  * driver. These are the real main entry points used to interact with this
169  * driver.
170  */
171 static device_method_t ice_iflib_methods[] = {
172 	DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre),
173 	DEVMETHOD(ifdi_attach_post, ice_if_attach_post),
174 	DEVMETHOD(ifdi_detach, ice_if_detach),
175 	DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc),
176 	DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc),
177 	DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign),
178 	DEVMETHOD(ifdi_queues_free, ice_if_queues_free),
179 	DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set),
180 	DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable),
181 	DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable),
182 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable),
183 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable),
184 	DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set),
185 	DEVMETHOD(ifdi_media_status, ice_if_media_status),
186 	DEVMETHOD(ifdi_media_change, ice_if_media_change),
187 	DEVMETHOD(ifdi_init, ice_if_init),
188 	DEVMETHOD(ifdi_stop, ice_if_stop),
189 	DEVMETHOD(ifdi_timer, ice_if_timer),
190 	DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status),
191 	DEVMETHOD(ifdi_multi_set, ice_if_multi_set),
192 	DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register),
193 	DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister),
194 	DEVMETHOD(ifdi_get_counter, ice_if_get_counter),
195 	DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl),
196 	DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req),
197 	DEVMETHOD(ifdi_suspend, ice_if_suspend),
198 	DEVMETHOD(ifdi_resume, ice_if_resume),
199 	DEVMETHOD(ifdi_needs_restart, ice_if_needs_restart),
200 	DEVMETHOD_END
201 };
202 
203 /**
204  * @var ice_driver
205  * @brief driver structure for the generic device stack
206  *
207  * driver_t definition used to setup the generic device methods.
208  */
209 static driver_t ice_driver = {
210 	.name = "ice",
211 	.methods = ice_methods,
212 	.size = sizeof(struct ice_softc),
213 };
214 
215 /**
216  * @var ice_iflib_driver
217  * @brief driver structure for the iflib stack
218  *
219  * driver_t definition used to setup the iflib device methods.
220  */
221 static driver_t ice_iflib_driver = {
222 	.name = "ice",
223 	.methods = ice_iflib_methods,
224 	.size = sizeof(struct ice_softc),
225 };
226 
227 extern struct if_txrx ice_txrx;
228 extern struct if_txrx ice_recovery_txrx;
229 
230 /**
231  * @var ice_sctx
232  * @brief ice driver shared context
233  *
234  * Structure defining shared values (context) that is used by all instances of
235  * the device. Primarily used to setup details about how the iflib stack
236  * should treat this driver. Also defines the default, minimum, and maximum
237  * number of descriptors in each ring.
238  */
239 static struct if_shared_ctx ice_sctx = {
240 	.isc_magic = IFLIB_MAGIC,
241 	.isc_q_align = PAGE_SIZE,
242 
243 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
244 	/* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but
245 	 * that doesn't make sense since that would be larger than the maximum
246 	 * size of a single packet.
247 	 */
248 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
249 
250 	/* XXX: This is only used by iflib to ensure that
251 	 * scctx->isc_tx_tso_size_max + the VLAN header is a valid size.
252 	 */
253 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
254 	/* XXX: This is used by iflib to set the number of segments in the TSO
255 	 * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the
256 	 * related ifnet parameter.
257 	 */
258 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
259 
260 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
261 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
262 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
263 
264 	.isc_nfl = 1,
265 	.isc_ntxqs = 1,
266 	.isc_nrxqs = 1,
267 
268 	.isc_admin_intrcnt = 1,
269 	.isc_vendor_info = ice_vendor_info_array,
270 	.isc_driver_version = __DECONST(char *, ice_driver_version),
271 	.isc_driver = &ice_iflib_driver,
272 
273 	/*
274 	 * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available
275 	 * for hardware checksum offload
276 	 *
277 	 * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the
278 	 * IP sum field, required by our hardware to calculate valid TSO
279 	 * checksums.
280 	 *
281 	 * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs
282 	 * even when the interface is down.
283 	 *
284 	 * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X
285 	 * vectors manually instead of relying on iflib code to do this.
286 	 */
287 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
288 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX,
289 
290 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
291 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
292 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
293 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
294 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
295 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
296 };
297 
298 DRIVER_MODULE(ice, pci, ice_driver, ice_module_event_handler, NULL);
299 
300 MODULE_VERSION(ice, 1);
301 MODULE_DEPEND(ice, pci, 1, 1, 1);
302 MODULE_DEPEND(ice, ether, 1, 1, 1);
303 MODULE_DEPEND(ice, iflib, 1, 1, 1);
304 
305 IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array);
306 
307 /* Static driver-wide sysctls */
308 #include "ice_iflib_sysctls.h"
309 
310 /**
311  * ice_pci_mapping - Map PCI BAR memory
312  * @sc: device private softc
313  *
314  * Map PCI BAR 0 for device operation.
315  */
316 static int
317 ice_pci_mapping(struct ice_softc *sc)
318 {
319 	int rc;
320 
321 	/* Map BAR0 */
322 	rc = ice_map_bar(sc->dev, &sc->bar0, 0);
323 	if (rc)
324 		return rc;
325 
326 	return 0;
327 }
328 
329 /**
330  * ice_free_pci_mapping - Release PCI BAR memory
331  * @sc: device private softc
332  *
333  * Release PCI BARs which were previously mapped by ice_pci_mapping().
334  */
335 static void
336 ice_free_pci_mapping(struct ice_softc *sc)
337 {
338 	/* Free BAR0 */
339 	ice_free_bar(sc->dev, &sc->bar0);
340 }
341 
342 /*
343  * Device methods
344  */
345 
346 /**
347  * ice_register - register device method callback
348  * @dev: the device being registered
349  *
350  * Returns a pointer to the shared context structure, which is used by iflib.
351  */
352 static void *
353 ice_register(device_t dev __unused)
354 {
355 	return &ice_sctx;
356 } /* ice_register */
357 
358 /**
359  * ice_setup_scctx - Setup the iflib softc context structure
360  * @sc: the device private structure
361  *
362  * Setup the parameters in if_softc_ctx_t structure used by the iflib stack
363  * when loading.
364  */
365 static void
366 ice_setup_scctx(struct ice_softc *sc)
367 {
368 	if_softc_ctx_t scctx = sc->scctx;
369 	struct ice_hw *hw = &sc->hw;
370 	device_t dev = sc->dev;
371 	bool safe_mode, recovery_mode;
372 
373 	safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE);
374 	recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE);
375 
376 	/*
377 	 * If the driver loads in Safe mode or Recovery mode, limit iflib to
378 	 * a single queue pair.
379 	 */
380 	if (safe_mode || recovery_mode) {
381 		scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1;
382 		scctx->isc_ntxqsets_max = 1;
383 		scctx->isc_nrxqsets_max = 1;
384 	} else {
385 		/*
386 		 * iflib initially sets the isc_ntxqsets and isc_nrxqsets to
387 		 * the values of the override sysctls. Cache these initial
388 		 * values so that the driver can be aware of what the iflib
389 		 * sysctl value is when setting up MSI-X vectors.
390 		 */
391 		sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets;
392 		sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets;
393 
394 		if (scctx->isc_ntxqsets == 0)
395 			scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size;
396 		if (scctx->isc_nrxqsets == 0)
397 			scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size;
398 
399 		scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq;
400 		scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq;
401 
402 		/*
403 		 * Sanity check that the iflib sysctl values are within the
404 		 * maximum supported range.
405 		 */
406 		if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max)
407 			sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max;
408 		if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max)
409 			sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max;
410 	}
411 
412 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
413 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
414 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
415 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
416 
417 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
418 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
419 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
420 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
421 
422 	scctx->isc_msix_bar = pci_msix_table_bar(dev);
423 	scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size;
424 
425 	/*
426 	 * If the driver loads in recovery mode, disable Tx/Rx functionality
427 	 */
428 	if (recovery_mode)
429 		scctx->isc_txrx = &ice_recovery_txrx;
430 	else
431 		scctx->isc_txrx = &ice_txrx;
432 
433 	/*
434 	 * If the driver loads in Safe mode or Recovery mode, disable
435 	 * advanced features including hardware offloads.
436 	 */
437 	if (safe_mode || recovery_mode) {
438 		scctx->isc_capenable = ICE_SAFE_CAPS;
439 		scctx->isc_tx_csum_flags = 0;
440 	} else {
441 		scctx->isc_capenable = ICE_FULL_CAPS;
442 		scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
443 	}
444 
445 	scctx->isc_capabilities = scctx->isc_capenable;
446 } /* ice_setup_scctx */
447 
448 /**
449  * ice_if_attach_pre - Early device attach logic
450  * @ctx: the iflib context structure
451  *
452  * Called by iflib during the attach process. Earliest main driver entry
453  * point which performs necessary hardware and driver initialization. Called
454  * before the Tx and Rx queues are allocated.
455  */
456 static int
457 ice_if_attach_pre(if_ctx_t ctx)
458 {
459 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
460 	enum ice_fw_modes fw_mode;
461 	enum ice_status status;
462 	if_softc_ctx_t scctx;
463 	struct ice_hw *hw;
464 	device_t dev;
465 	int err;
466 
467 	device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n");
468 
469 	ice_set_state(&sc->state, ICE_STATE_ATTACHING);
470 
471 	sc->ctx = ctx;
472 	sc->media = iflib_get_media(ctx);
473 	sc->sctx = iflib_get_sctx(ctx);
474 	sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx);
475 
476 	dev = sc->dev = iflib_get_dev(ctx);
477 	scctx = sc->scctx = iflib_get_softc_ctx(ctx);
478 
479 	hw = &sc->hw;
480 	hw->back = sc;
481 
482 	snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name),
483 		 "%s:admin", device_get_nameunit(dev));
484 	mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF);
485 	callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0);
486 
487 	ASSERT_CTX_LOCKED(sc);
488 
489 	if (ice_pci_mapping(sc)) {
490 		err = (ENXIO);
491 		goto destroy_admin_timer;
492 	}
493 
494 	/* Save off the PCI information */
495 	ice_save_pci_info(hw, dev);
496 
497 	/* create tunables as early as possible */
498 	ice_add_device_tunables(sc);
499 
500 	/* Setup ControlQ lengths */
501 	ice_set_ctrlq_len(hw);
502 
503 reinit_hw:
504 
505 	fw_mode = ice_get_fw_mode(hw);
506 	if (fw_mode == ICE_FW_MODE_REC) {
507 		device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
508 
509 		err = ice_attach_pre_recovery_mode(sc);
510 		if (err)
511 			goto free_pci_mapping;
512 
513 		return (0);
514 	}
515 
516 	/* Initialize the hw data structure */
517 	status = ice_init_hw(hw);
518 	if (status) {
519 		if (status == ICE_ERR_FW_API_VER) {
520 			/* Enter recovery mode, so that the driver remains
521 			 * loaded. This way, if the system administrator
522 			 * cannot update the driver, they may still attempt to
523 			 * downgrade the NVM.
524 			 */
525 			err = ice_attach_pre_recovery_mode(sc);
526 			if (err)
527 				goto free_pci_mapping;
528 
529 			return (0);
530 		} else {
531 			err = EIO;
532 			device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n",
533 				      ice_status_str(status),
534 				      ice_aq_str(hw->adminq.sq_last_status));
535 		}
536 		goto free_pci_mapping;
537 	}
538 
539 	ice_init_device_features(sc);
540 
541 	/* Keep flag set by default */
542 	ice_set_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
543 
544 	/* Notify firmware of the device driver version */
545 	err = ice_send_version(sc);
546 	if (err)
547 		goto deinit_hw;
548 
549 	/*
550 	 * Success indicates a change was made that requires a reinitialization
551 	 * of the hardware
552 	 */
553 	err = ice_load_pkg_file(sc);
554 	if (err == ICE_SUCCESS) {
555 		ice_deinit_hw(hw);
556 		goto reinit_hw;
557 	}
558 
559 	err = ice_init_link_events(sc);
560 	if (err) {
561 		device_printf(dev, "ice_init_link_events failed: %s\n",
562 			      ice_err_str(err));
563 		goto deinit_hw;
564 	}
565 
566 	/* Initialize VLAN mode in FW; if dual VLAN mode is supported by the package
567 	 * and firmware, this will force them to use single VLAN mode.
568 	 */
569 	status = ice_set_vlan_mode(hw);
570 	if (status) {
571 		err = EIO;
572 		device_printf(dev, "Unable to initialize VLAN mode, err %s aq_err %s\n",
573 			      ice_status_str(status),
574 			      ice_aq_str(hw->adminq.sq_last_status));
575 		goto deinit_hw;
576 	}
577 
578 	ice_print_nvm_version(sc);
579 
580 	/* Setup the MAC address */
581 	iflib_set_mac(ctx, hw->port_info->mac.lan_addr);
582 
583 	/* Setup the iflib softc context structure */
584 	ice_setup_scctx(sc);
585 
586 	/* Initialize the Tx queue manager */
587 	err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq);
588 	if (err) {
589 		device_printf(dev, "Unable to initialize Tx queue manager: %s\n",
590 			      ice_err_str(err));
591 		goto deinit_hw;
592 	}
593 
594 	/* Initialize the Rx queue manager */
595 	err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq);
596 	if (err) {
597 		device_printf(dev, "Unable to initialize Rx queue manager: %s\n",
598 			      ice_err_str(err));
599 		goto free_tx_qmgr;
600 	}
601 
602 	/* Initialize the PF device interrupt resource manager */
603 	err = ice_alloc_intr_tracking(sc);
604 	if (err)
605 		/* Errors are already printed */
606 		goto free_rx_qmgr;
607 
608 	/* Determine maximum number of VSIs we'll prepare for */
609 	sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE,
610 				    hw->func_caps.guar_num_vsi);
611 
612 	if (!sc->num_available_vsi) {
613 		err = EIO;
614 		device_printf(dev, "No VSIs allocated to host\n");
615 		goto free_intr_tracking;
616 	}
617 
618 	/* Allocate storage for the VSI pointers */
619 	sc->all_vsi = (struct ice_vsi **)
620 		malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi,
621 		       M_ICE, M_WAITOK | M_ZERO);
622 	if (!sc->all_vsi) {
623 		err = ENOMEM;
624 		device_printf(dev, "Unable to allocate VSI array\n");
625 		goto free_intr_tracking;
626 	}
627 
628 	/*
629 	 * Prepare the statically allocated primary PF VSI in the softc
630 	 * structure. Other VSIs will be dynamically allocated as needed.
631 	 */
632 	ice_setup_pf_vsi(sc);
633 
634 	err = ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max,
635 	    scctx->isc_nrxqsets_max);
636 	if (err) {
637 		device_printf(dev, "Unable to allocate VSI Queue maps\n");
638 		goto free_main_vsi;
639 	}
640 
641 	/* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */
642 	err = ice_allocate_msix(sc);
643 	if (err)
644 		goto free_main_vsi;
645 
646 	return 0;
647 
648 free_main_vsi:
649 	/* ice_release_vsi will free the queue maps if they were allocated */
650 	ice_release_vsi(&sc->pf_vsi);
651 	free(sc->all_vsi, M_ICE);
652 	sc->all_vsi = NULL;
653 free_intr_tracking:
654 	ice_free_intr_tracking(sc);
655 free_rx_qmgr:
656 	ice_resmgr_destroy(&sc->rx_qmgr);
657 free_tx_qmgr:
658 	ice_resmgr_destroy(&sc->tx_qmgr);
659 deinit_hw:
660 	ice_deinit_hw(hw);
661 free_pci_mapping:
662 	ice_free_pci_mapping(sc);
663 destroy_admin_timer:
664 	mtx_lock(&sc->admin_mtx);
665 	callout_stop(&sc->admin_timer);
666 	mtx_unlock(&sc->admin_mtx);
667 	mtx_destroy(&sc->admin_mtx);
668 	return err;
669 } /* ice_if_attach_pre */
670 
671 /**
672  * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery
673  * @sc: the device private softc
674  *
675  * Loads the device driver in limited Firmware Recovery mode, intended to
676  * allow users to update the firmware to attempt to recover the device.
677  *
678  * @remark We may enter recovery mode in case either (a) the firmware is
679  * detected to be in an invalid state and must be re-programmed, or (b) the
680  * driver detects that the loaded firmware has a non-compatible API version
681  * that the driver cannot operate with.
682  */
683 static int
684 ice_attach_pre_recovery_mode(struct ice_softc *sc)
685 {
686 	ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE);
687 
688 	/* Setup the iflib softc context */
689 	ice_setup_scctx(sc);
690 
691 	/* Setup the PF VSI back pointer */
692 	sc->pf_vsi.sc = sc;
693 
694 	/*
695 	 * We still need to allocate MSI-X vectors since we need one vector to
696 	 * run the administrative admin interrupt
697 	 */
698 	return ice_allocate_msix(sc);
699 }
700 
701 /**
702  * ice_update_link_status - notify OS of link state change
703  * @sc: device private softc structure
704  * @update_media: true if we should update media even if link didn't change
705  *
706  * Called to notify iflib core of link status changes. Should be called once
707  * during attach_post, and whenever link status changes during runtime.
708  *
709  * This call only updates the currently supported media types if the link
710  * status changed, or if update_media is set to true.
711  */
712 static void
713 ice_update_link_status(struct ice_softc *sc, bool update_media)
714 {
715 	struct ice_hw *hw = &sc->hw;
716 	enum ice_status status;
717 
718 	/* Never report link up when in recovery mode */
719 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
720 		return;
721 
722 	/* Report link status to iflib only once each time it changes */
723 	if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) {
724 		if (sc->link_up) { /* link is up */
725 			uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info);
726 
727 			if (!(hw->port_info->phy.link_info_old.link_info & ICE_AQ_LINK_UP))
728 				ice_set_default_local_lldp_mib(sc);
729 
730 			iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
731 			ice_rdma_link_change(sc, LINK_STATE_UP, baudrate);
732 
733 			ice_link_up_msg(sc);
734 		} else { /* link is down */
735 			iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
736 			ice_rdma_link_change(sc, LINK_STATE_DOWN, 0);
737 		}
738 		update_media = true;
739 	}
740 
741 	/* Update the supported media types */
742 	if (update_media && !ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
743 		status = ice_add_media_types(sc, sc->media);
744 		if (status)
745 			device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
746 				      ice_status_str(status),
747 				      ice_aq_str(hw->adminq.sq_last_status));
748 	}
749 }
750 
751 /**
752  * ice_if_attach_post - Late device attach logic
753  * @ctx: the iflib context structure
754  *
755  * Called by iflib to finish up attaching the device. Performs any attach
756  * logic which must wait until after the Tx and Rx queues have been
757  * allocated.
758  */
759 static int
760 ice_if_attach_post(if_ctx_t ctx)
761 {
762 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
763 	if_t ifp = iflib_get_ifp(ctx);
764 	enum ice_status status;
765 	int err;
766 
767 	ASSERT_CTX_LOCKED(sc);
768 
769 	/* We don't yet support loading if MSI-X is not supported */
770 	if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) {
771 		device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n");
772 		return (ENOTSUP);
773 	}
774 
775 	/* The ifnet structure hasn't yet been initialized when the attach_pre
776 	 * handler is called, so wait until attach_post to setup the
777 	 * isc_max_frame_size.
778 	 */
779 
780 	sc->ifp = ifp;
781 	sc->scctx->isc_max_frame_size = if_getmtu(ifp) +
782 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
783 
784 	/*
785 	 * If we are in recovery mode, only perform a limited subset of
786 	 * initialization to support NVM recovery.
787 	 */
788 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
789 		ice_attach_post_recovery_mode(sc);
790 		return (0);
791 	}
792 
793 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
794 
795 	err = ice_initialize_vsi(&sc->pf_vsi);
796 	if (err) {
797 		device_printf(sc->dev, "Unable to initialize Main VSI: %s\n",
798 			      ice_err_str(err));
799 		return err;
800 	}
801 
802 	/* Enable FW health event reporting */
803 	ice_init_health_events(sc);
804 
805 	/* Configure the main PF VSI for RSS */
806 	err = ice_config_rss(&sc->pf_vsi);
807 	if (err) {
808 		device_printf(sc->dev,
809 			      "Unable to configure RSS for the main VSI, err %s\n",
810 			      ice_err_str(err));
811 		return err;
812 	}
813 
814 	/* Configure switch to drop transmitted LLDP and PAUSE frames */
815 	err = ice_cfg_pf_ethertype_filters(sc);
816 	if (err)
817 		return err;
818 
819 	ice_get_and_print_bus_info(sc);
820 
821 	ice_set_link_management_mode(sc);
822 
823 	ice_init_saved_phy_cfg(sc);
824 
825 	ice_cfg_pba_num(sc);
826 
827 	/* Set a default value for PFC mode on attach since the FW state is unknown
828 	 * before sysctl tunables are executed and it can't be queried. This fixes an
829 	 * issue when loading the driver with the FW LLDP agent enabled but the FW
830 	 * was previously in DSCP PFC mode.
831 	 */
832 	status = ice_aq_set_pfc_mode(&sc->hw, ICE_AQC_PFC_VLAN_BASED_PFC, NULL);
833 	if (status != ICE_SUCCESS)
834 		device_printf(sc->dev, "Setting pfc mode failed, status %s\n", ice_status_str(status));
835 
836 	ice_add_device_sysctls(sc);
837 
838 	/* Get DCBX/LLDP state and start DCBX agent */
839 	ice_init_dcb_setup(sc);
840 
841 	/* Setup link configuration parameters */
842 	ice_init_link_configuration(sc);
843 	ice_update_link_status(sc, true);
844 
845 	/* Configure interrupt causes for the administrative interrupt */
846 	ice_configure_misc_interrupts(sc);
847 
848 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
849 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
850 
851 	err = ice_rdma_pf_attach(sc);
852 	if (err)
853 		return (err);
854 
855 	/* Start the admin timer */
856 	mtx_lock(&sc->admin_mtx);
857 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
858 	mtx_unlock(&sc->admin_mtx);
859 
860 	if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
861 		 !ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
862 		ice_set_state(&sc->state, ICE_STATE_FIRST_INIT_LINK);
863 
864 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
865 
866 	return 0;
867 } /* ice_if_attach_post */
868 
869 /**
870  * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery
871  * @sc: the device private softc
872  *
873  * Performs minimal work to prepare the driver to recover an NVM in case the
874  * firmware is in recovery mode.
875  */
876 static void
877 ice_attach_post_recovery_mode(struct ice_softc *sc)
878 {
879 	/* Configure interrupt causes for the administrative interrupt */
880 	ice_configure_misc_interrupts(sc);
881 
882 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
883 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
884 
885 	/* Start the admin timer */
886 	mtx_lock(&sc->admin_mtx);
887 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
888 	mtx_unlock(&sc->admin_mtx);
889 
890 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
891 }
892 
893 /**
894  * ice_free_irqvs - Free IRQ vector memory
895  * @sc: the device private softc structure
896  *
897  * Free IRQ vector memory allocated during ice_if_msix_intr_assign.
898  */
899 static void
900 ice_free_irqvs(struct ice_softc *sc)
901 {
902 	struct ice_vsi *vsi = &sc->pf_vsi;
903 	if_ctx_t ctx = sc->ctx;
904 	int i;
905 
906 	/* If the irqvs array is NULL, then there are no vectors to free */
907 	if (sc->irqvs == NULL)
908 		return;
909 
910 	/* Free the IRQ vectors */
911 	for (i = 0; i < sc->num_irq_vectors; i++)
912 		iflib_irq_free(ctx, &sc->irqvs[i].irq);
913 
914 	/* Clear the irqv pointers */
915 	for (i = 0; i < vsi->num_rx_queues; i++)
916 		vsi->rx_queues[i].irqv = NULL;
917 
918 	for (i = 0; i < vsi->num_tx_queues; i++)
919 		vsi->tx_queues[i].irqv = NULL;
920 
921 	/* Release the vector array memory */
922 	free(sc->irqvs, M_ICE);
923 	sc->irqvs = NULL;
924 	sc->num_irq_vectors = 0;
925 }
926 
927 /**
928  * ice_if_detach - Device driver detach logic
929  * @ctx: iflib context structure
930  *
931  * Perform device shutdown logic to detach the device driver.
932  *
933  * Note that there is no guarantee of the ordering of ice_if_queues_free() and
934  * ice_if_detach(). It is possible for the functions to be called in either
935  * order, and they must not assume to have a strict ordering.
936  */
937 static int
938 ice_if_detach(if_ctx_t ctx)
939 {
940 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
941 	struct ice_vsi *vsi = &sc->pf_vsi;
942 	enum ice_status status;
943 	int i;
944 
945 	ASSERT_CTX_LOCKED(sc);
946 
947 	/* Indicate that we're detaching */
948 	ice_set_state(&sc->state, ICE_STATE_DETACHING);
949 
950 	/* Stop the admin timer */
951 	mtx_lock(&sc->admin_mtx);
952 	callout_stop(&sc->admin_timer);
953 	mtx_unlock(&sc->admin_mtx);
954 	mtx_destroy(&sc->admin_mtx);
955 
956 	/* Remove additional interfaces if they exist */
957 	if (sc->mirr_if)
958 		ice_destroy_mirror_interface(sc);
959 	ice_rdma_pf_detach(sc);
960 
961 	/* Free allocated media types */
962 	ifmedia_removeall(sc->media);
963 
964 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
965 	 * pointers. Note, the calls here and those in ice_if_queues_free()
966 	 * are *BOTH* necessary, as we cannot guarantee which path will be
967 	 * run first
968 	 */
969 	ice_vsi_del_txqs_ctx(vsi);
970 	ice_vsi_del_rxqs_ctx(vsi);
971 
972 	/* Release MSI-X resources */
973 	ice_free_irqvs(sc);
974 
975 	for (i = 0; i < sc->num_available_vsi; i++) {
976 		if (sc->all_vsi[i])
977 			ice_release_vsi(sc->all_vsi[i]);
978 	}
979 
980 	if (sc->all_vsi) {
981 		free(sc->all_vsi, M_ICE);
982 		sc->all_vsi = NULL;
983 	}
984 
985 	/* Release MSI-X memory */
986 	pci_release_msi(sc->dev);
987 
988 	if (sc->msix_table != NULL) {
989 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
990 				     rman_get_rid(sc->msix_table),
991 				     sc->msix_table);
992 		sc->msix_table = NULL;
993 	}
994 
995 	ice_free_intr_tracking(sc);
996 
997 	/* Destroy the queue managers */
998 	ice_resmgr_destroy(&sc->tx_qmgr);
999 	ice_resmgr_destroy(&sc->rx_qmgr);
1000 
1001 	if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1002 		ice_deinit_hw(&sc->hw);
1003 
1004 	IFLIB_CTX_UNLOCK(sc);
1005 	status = ice_reset(&sc->hw, ICE_RESET_PFR);
1006 	IFLIB_CTX_LOCK(sc);
1007 	if (status) {
1008 		device_printf(sc->dev, "device PF reset failed, err %s\n",
1009 			      ice_status_str(status));
1010 	}
1011 
1012 	ice_free_pci_mapping(sc);
1013 
1014 	return 0;
1015 } /* ice_if_detach */
1016 
1017 /**
1018  * ice_if_tx_queues_alloc - Allocate Tx queue memory
1019  * @ctx: iflib context structure
1020  * @vaddrs: virtual addresses for the queue memory
1021  * @paddrs: physical addresses for the queue memory
1022  * @ntxqs: the number of Tx queues per set (should always be 1)
1023  * @ntxqsets: the number of Tx queue sets to allocate
1024  *
1025  * Called by iflib to allocate Tx queues for the device. Allocates driver
1026  * memory to track each queue, the status arrays used for descriptor
1027  * status reporting, and Tx queue sysctls.
1028  */
1029 static int
1030 ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1031 		       int __invariant_only ntxqs, int ntxqsets)
1032 {
1033 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1034 	struct ice_vsi *vsi = &sc->pf_vsi;
1035 	struct ice_tx_queue *txq;
1036 	int err, i, j;
1037 
1038 	MPASS(ntxqs == 1);
1039 	MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
1040 	ASSERT_CTX_LOCKED(sc);
1041 
1042 	/* Do not bother allocating queues if we're in recovery mode */
1043 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1044 		return (0);
1045 
1046 	/* Allocate queue structure memory */
1047 	if (!(vsi->tx_queues =
1048 	      (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1049 		device_printf(sc->dev, "Unable to allocate Tx queue memory\n");
1050 		return (ENOMEM);
1051 	}
1052 
1053 	/* Allocate report status arrays */
1054 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1055 		if (!(txq->tx_rsq =
1056 		      (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
1057 			device_printf(sc->dev, "Unable to allocate tx_rsq memory\n");
1058 			err = ENOMEM;
1059 			goto free_tx_queues;
1060 		}
1061 		/* Initialize report status array */
1062 		for (j = 0; j < sc->scctx->isc_ntxd[0]; j++)
1063 			txq->tx_rsq[j] = QIDX_INVALID;
1064 	}
1065 
1066 	/* Assign queues from PF space to the main VSI */
1067 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets);
1068 	if (err) {
1069 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1070 			      ice_err_str(err));
1071 		goto free_tx_queues;
1072 	}
1073 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1074 
1075 	/* Add Tx queue sysctls context */
1076 	ice_vsi_add_txqs_ctx(vsi);
1077 
1078 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1079 		/* q_handle == me when only one TC */
1080 		txq->me = txq->q_handle = i;
1081 		txq->vsi = vsi;
1082 
1083 		/* store the queue size for easier access */
1084 		txq->desc_count = sc->scctx->isc_ntxd[0];
1085 
1086 		/* get the virtual and physical address of the hardware queues */
1087 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
1088 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
1089 		txq->tx_paddr = paddrs[i];
1090 
1091 		ice_add_txq_sysctls(txq);
1092 	}
1093 
1094 	vsi->num_tx_queues = ntxqsets;
1095 
1096 	return (0);
1097 
1098 free_tx_queues:
1099 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1100 		if (txq->tx_rsq != NULL) {
1101 			free(txq->tx_rsq, M_ICE);
1102 			txq->tx_rsq = NULL;
1103 		}
1104 	}
1105 	free(vsi->tx_queues, M_ICE);
1106 	vsi->tx_queues = NULL;
1107 	return err;
1108 }
1109 
1110 /**
1111  * ice_if_rx_queues_alloc - Allocate Rx queue memory
1112  * @ctx: iflib context structure
1113  * @vaddrs: virtual addresses for the queue memory
1114  * @paddrs: physical addresses for the queue memory
1115  * @nrxqs: number of Rx queues per set (should always be 1)
1116  * @nrxqsets: number of Rx queue sets to allocate
1117  *
1118  * Called by iflib to allocate Rx queues for the device. Allocates driver
1119  * memory to track each queue, as well as sets up the Rx queue sysctls.
1120  */
1121 static int
1122 ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1123 		       int __invariant_only nrxqs, int nrxqsets)
1124 {
1125 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1126 	struct ice_vsi *vsi = &sc->pf_vsi;
1127 	struct ice_rx_queue *rxq;
1128 	int err, i;
1129 
1130 	MPASS(nrxqs == 1);
1131 	MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
1132 	ASSERT_CTX_LOCKED(sc);
1133 
1134 	/* Do not bother allocating queues if we're in recovery mode */
1135 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1136 		return (0);
1137 
1138 	/* Allocate queue structure memory */
1139 	if (!(vsi->rx_queues =
1140 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1141 		device_printf(sc->dev, "Unable to allocate Rx queue memory\n");
1142 		return (ENOMEM);
1143 	}
1144 
1145 	/* Assign queues from PF space to the main VSI */
1146 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets);
1147 	if (err) {
1148 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1149 			      ice_err_str(err));
1150 		goto free_rx_queues;
1151 	}
1152 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1153 
1154 	/* Add Rx queue sysctls context */
1155 	ice_vsi_add_rxqs_ctx(vsi);
1156 
1157 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
1158 		rxq->me = i;
1159 		rxq->vsi = vsi;
1160 
1161 		/* store the queue size for easier access */
1162 		rxq->desc_count = sc->scctx->isc_nrxd[0];
1163 
1164 		/* get the virtual and physical address of the hardware queues */
1165 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
1166 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
1167 		rxq->rx_paddr = paddrs[i];
1168 
1169 		ice_add_rxq_sysctls(rxq);
1170 	}
1171 
1172 	vsi->num_rx_queues = nrxqsets;
1173 
1174 	return (0);
1175 
1176 free_rx_queues:
1177 	free(vsi->rx_queues, M_ICE);
1178 	vsi->rx_queues = NULL;
1179 	return err;
1180 }
1181 
1182 /**
1183  * ice_if_queues_free - Free queue memory
1184  * @ctx: the iflib context structure
1185  *
1186  * Free queue memory allocated by ice_if_tx_queues_alloc() and
1187  * ice_if_rx_queues_alloc().
1188  *
1189  * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be
1190  * called in the same order. It's possible for ice_if_queues_free() to be
1191  * called prior to ice_if_detach(), and vice versa.
1192  *
1193  * For this reason, the main VSI is a static member of the ice_softc, which is
1194  * not free'd until after iflib finishes calling both of these functions.
1195  *
1196  * Thus, care must be taken in how we manage the memory being freed by this
1197  * function, and in what tasks it can and must perform.
1198  */
1199 static void
1200 ice_if_queues_free(if_ctx_t ctx)
1201 {
1202 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1203 	struct ice_vsi *vsi = &sc->pf_vsi;
1204 	struct ice_tx_queue *txq;
1205 	int i;
1206 
1207 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
1208 	 * pointers. Note, the calls here and those in ice_if_detach()
1209 	 * are *BOTH* necessary, as we cannot guarantee which path will be
1210 	 * run first
1211 	 */
1212 	ice_vsi_del_txqs_ctx(vsi);
1213 	ice_vsi_del_rxqs_ctx(vsi);
1214 
1215 	/* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */
1216 	ice_free_irqvs(sc);
1217 
1218 	if (vsi->tx_queues != NULL) {
1219 		/* free the tx_rsq arrays */
1220 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1221 			if (txq->tx_rsq != NULL) {
1222 				free(txq->tx_rsq, M_ICE);
1223 				txq->tx_rsq = NULL;
1224 			}
1225 		}
1226 		free(vsi->tx_queues, M_ICE);
1227 		vsi->tx_queues = NULL;
1228 		vsi->num_tx_queues = 0;
1229 	}
1230 	if (vsi->rx_queues != NULL) {
1231 		free(vsi->rx_queues, M_ICE);
1232 		vsi->rx_queues = NULL;
1233 		vsi->num_rx_queues = 0;
1234 	}
1235 }
1236 
1237 /**
1238  * ice_msix_que - Fast interrupt handler for MSI-X receive queues
1239  * @arg: The Rx queue memory
1240  *
1241  * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when
1242  * an MSI-X interrupt for a given queue is triggered. Currently this just asks
1243  * iflib to schedule the main Rx thread.
1244  */
1245 static int
1246 ice_msix_que(void *arg)
1247 {
1248 	struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg;
1249 
1250 	/* TODO: dynamic ITR algorithm?? */
1251 
1252 	return (FILTER_SCHEDULE_THREAD);
1253 }
1254 
1255 /**
1256  * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt
1257  * @arg: pointer to device softc memory
1258  *
1259  * Called by iflib when an administrative interrupt occurs. Should perform any
1260  * fast logic for handling the interrupt cause, and then indicate whether the
1261  * admin task needs to be queued.
1262  */
1263 static int
1264 ice_msix_admin(void *arg)
1265 {
1266 	struct ice_softc *sc = (struct ice_softc *)arg;
1267 	struct ice_hw *hw = &sc->hw;
1268 	device_t dev = sc->dev;
1269 	u32 oicr;
1270 
1271 	/* There is no safe way to modify the enabled miscellaneous causes of
1272 	 * the OICR vector at runtime, as doing so would be prone to race
1273 	 * conditions. Reading PFINT_OICR will unmask the associated interrupt
1274 	 * causes and allow future interrupts to occur. The admin interrupt
1275 	 * vector will not be re-enabled until after we exit this function,
1276 	 * but any delayed tasks must be resilient against possible "late
1277 	 * arrival" interrupts that occur while we're already handling the
1278 	 * task. This is done by using state bits and serializing these
1279 	 * delayed tasks via the admin status task function.
1280 	 */
1281 	oicr = rd32(hw, PFINT_OICR);
1282 
1283 	/* Processing multiple controlq interrupts on a single vector does not
1284 	 * provide an indication of which controlq triggered the interrupt.
1285 	 * We might try reading the INTEVENT bit of the respective PFINT_*_CTL
1286 	 * registers. However, the INTEVENT bit is not guaranteed to be set as
1287 	 * it gets automatically cleared when the hardware acknowledges the
1288 	 * interrupt.
1289 	 *
1290 	 * This means we don't really have a good indication of whether or
1291 	 * which controlq triggered this interrupt. We'll just notify the
1292 	 * admin task that it should check all the controlqs.
1293 	 */
1294 	ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
1295 
1296 	if (oicr & PFINT_OICR_VFLR_M) {
1297 		ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING);
1298 	}
1299 
1300 	if (oicr & PFINT_OICR_MAL_DETECT_M) {
1301 		ice_set_state(&sc->state, ICE_STATE_MDD_PENDING);
1302 	}
1303 
1304 	if (oicr & PFINT_OICR_GRST_M) {
1305 		u32 reset;
1306 
1307 		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
1308 			GLGEN_RSTAT_RESET_TYPE_S;
1309 
1310 		if (reset == ICE_RESET_CORER)
1311 			sc->soft_stats.corer_count++;
1312 		else if (reset == ICE_RESET_GLOBR)
1313 			sc->soft_stats.globr_count++;
1314 		else
1315 			sc->soft_stats.empr_count++;
1316 
1317 		/* There are a couple of bits at play for handling resets.
1318 		 * First, the ICE_STATE_RESET_OICR_RECV bit is used to
1319 		 * indicate that the driver has received an OICR with a reset
1320 		 * bit active, indicating that a CORER/GLOBR/EMPR is about to
1321 		 * happen. Second, we set hw->reset_ongoing to indicate that
1322 		 * the hardware is in reset. We will set this back to false as
1323 		 * soon as the driver has determined that the hardware is out
1324 		 * of reset.
1325 		 *
1326 		 * If the driver wishes to trigger a request, it can set one of
1327 		 * the ICE_STATE_RESET_*_REQ bits, which will trigger the
1328 		 * correct type of reset.
1329 		 */
1330 		if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV)) {
1331 			hw->reset_ongoing = true;
1332 			/*
1333 			 * During the NVM update process, there is a driver reset and link
1334 			 * goes down and then up. The below if-statement prevents a second
1335 			 * link flap from occurring in ice_if_init().
1336 			 */
1337 			if (if_getflags(sc->ifp) & IFF_UP)
1338 				ice_set_state(&sc->state, ICE_STATE_FIRST_INIT_LINK);
1339 		}
1340 	}
1341 
1342 	if (oicr & PFINT_OICR_ECC_ERR_M) {
1343 		device_printf(dev, "ECC Error detected!\n");
1344 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1345 	}
1346 
1347 	if (oicr & (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M)) {
1348 		if (oicr & PFINT_OICR_HMC_ERR_M)
1349 			/* Log the HMC errors */
1350 			ice_log_hmc_error(hw, dev);
1351 		ice_rdma_notify_pe_intr(sc, oicr);
1352 	}
1353 
1354 	if (oicr & PFINT_OICR_PCI_EXCEPTION_M) {
1355 		device_printf(dev, "PCI Exception detected!\n");
1356 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1357 	}
1358 
1359 	return (FILTER_SCHEDULE_THREAD);
1360 }
1361 
1362 /**
1363  * ice_allocate_msix - Allocate MSI-X vectors for the interface
1364  * @sc: the device private softc
1365  *
1366  * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process.
1367  *
1368  * First, determine a suitable total number of vectors based on the number
1369  * of CPUs, RSS buckets, the administrative vector, and other demands such as
1370  * RDMA.
1371  *
1372  * Request the desired amount of vectors, and see how many we obtain. If we
1373  * don't obtain as many as desired, reduce the demands by lowering the number
1374  * of requested queues or reducing the demand from other features such as
1375  * RDMA.
1376  *
1377  * @remark This function is required because the driver sets the
1378  * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors
1379  * manually.
1380  *
1381  * @remark This driver will only use MSI-X vectors. If this is not possible,
1382  * neither MSI or legacy interrupts will be tried.
1383  *
1384  * @remark if it exists, os_imgr is initialized here for keeping track of
1385  * the assignments of extra MSIX vectors.
1386  *
1387  * @post on success this function must set the following scctx parameters:
1388  * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr.
1389  *
1390  * @returns zero on success or an error code on failure.
1391  */
1392 static int
1393 ice_allocate_msix(struct ice_softc *sc)
1394 {
1395 	bool iflib_override_queue_count = false;
1396 	if_softc_ctx_t scctx = sc->scctx;
1397 	device_t dev = sc->dev;
1398 	cpuset_t cpus;
1399 	int bar, queues, vectors, requested;
1400 	int err = 0;
1401 	int rdma;
1402 
1403 	/* Allocate the MSI-X bar */
1404 	bar = scctx->isc_msix_bar;
1405 	sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE);
1406 	if (!sc->msix_table) {
1407 		device_printf(dev, "Unable to map MSI-X table\n");
1408 		return (ENOMEM);
1409 	}
1410 
1411 	/* Check if the iflib queue count sysctls have been set */
1412 	if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs)
1413 		iflib_override_queue_count = true;
1414 
1415 	err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus);
1416 	if (err) {
1417 		device_printf(dev, "%s: Unable to fetch the CPU list: %s\n",
1418 			      __func__, ice_err_str(err));
1419 		CPU_COPY(&all_cpus, &cpus);
1420 	}
1421 
1422 	/* Attempt to mimic behavior of iflib_msix_init */
1423 	if (iflib_override_queue_count) {
1424 		/*
1425 		 * If the override sysctls have been set, limit the queues to
1426 		 * the number of logical CPUs.
1427 		 */
1428 		queues = mp_ncpus;
1429 	} else {
1430 		/*
1431 		 * Otherwise, limit the queue count to the CPUs associated
1432 		 * with the NUMA node the device is associated with.
1433 		 */
1434 		queues = CPU_COUNT(&cpus);
1435 	}
1436 
1437 	/* Clamp to the number of RSS buckets */
1438 	queues = imin(queues, rss_getnumbuckets());
1439 
1440 	/*
1441 	 * Clamp the number of queue pairs to the minimum of the requested Tx
1442 	 * and Rx queues.
1443 	 */
1444 	queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
1445 	queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
1446 
1447 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) {
1448 		/*
1449 		 * Choose a number of RDMA vectors based on the number of CPUs
1450 		 * up to a maximum
1451 		 */
1452 		rdma = min(CPU_COUNT(&cpus), ICE_RDMA_MAX_MSIX);
1453 
1454 		/* Further limit by the user configurable tunable */
1455 		rdma = min(rdma, ice_rdma_max_msix);
1456 	} else {
1457 		rdma = 0;
1458 	}
1459 
1460 	/*
1461 	 * Determine the number of vectors to request. Note that we also need
1462 	 * to allocate one vector for administrative tasks.
1463 	 */
1464 	requested = rdma + queues + 1;
1465 	/* Add extra vectors requested by the user for later subinterface
1466 	 * creation.
1467 	 */
1468 	if_ctx_t ctx = sc->ctx;
1469 	u32 extra_vectors = iflib_get_extra_msix_vectors_sysctl(ctx);
1470 	requested += extra_vectors;
1471 
1472 	vectors = requested;
1473 	err = pci_alloc_msix(dev, &vectors);
1474 	if (err) {
1475 		device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n",
1476 			      vectors, ice_err_str(err));
1477 		goto err_free_msix_table;
1478 	}
1479 
1480 	/* If we don't receive enough vectors, reduce demands */
1481 	if (vectors < requested) {
1482 		int diff = requested - vectors;
1483 
1484 		device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n",
1485 			      requested, vectors);
1486 
1487 		diff += extra_vectors;
1488 		extra_vectors = 0;
1489 		/*
1490 		 * The OS didn't grant us the requested number of vectors.
1491 		 * Check to see if we can reduce demands by limiting the
1492 		 * number of vectors allocated to certain features.
1493 		 */
1494 
1495 		if (rdma >= diff) {
1496 			/* Reduce the number of RDMA vectors we reserve */
1497 			rdma -= diff;
1498 			diff = 0;
1499 		} else {
1500 			/* Disable RDMA and reduce the difference */
1501 			ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
1502 			diff -= rdma;
1503 			rdma = 0;
1504 		}
1505 
1506 		/*
1507 		 * If we still have a difference, we need to reduce the number
1508 		 * of queue pairs.
1509 		 *
1510 		 * However, we still need at least one vector for the admin
1511 		 * interrupt and one queue pair.
1512 		 */
1513 		if (queues <= diff) {
1514 			device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n");
1515 			err = (ERANGE);
1516 			goto err_pci_release_msi;
1517 		}
1518 
1519 		queues -= diff;
1520 	}
1521 
1522 	device_printf(dev, "Using %d Tx and Rx queues\n", queues);
1523 	if (rdma)
1524 		device_printf(dev, "Reserving %d MSI-X interrupts for iRDMA\n",
1525 			      rdma);
1526 	device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
1527 		      vectors);
1528 
1529 	/* Split resulting vectors back into requested splits */
1530 	scctx->isc_vectors = vectors;
1531 	scctx->isc_nrxqsets = queues;
1532 	scctx->isc_ntxqsets = queues;
1533 	scctx->isc_intr = IFLIB_INTR_MSIX;
1534 
1535 	sc->irdma_vectors = rdma;
1536 
1537 	/* Interrupt allocation tracking isn't required in recovery mode,
1538 	 * since neither RDMA nor VFs are enabled.
1539 	 */
1540 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1541 		return (0);
1542 
1543 	/* Keep track of which interrupt indices are being used for what */
1544 	sc->lan_vectors = vectors - rdma;
1545 	sc->lan_vectors -= extra_vectors;
1546 	err = ice_resmgr_assign_contiguous(&sc->dev_imgr, sc->pf_imap, sc->lan_vectors);
1547 	if (err) {
1548 		device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
1549 			      ice_err_str(err));
1550 		goto err_pci_release_msi;
1551 	}
1552 	err = ice_resmgr_assign_contiguous(&sc->dev_imgr, sc->rdma_imap, rdma);
1553 	if (err) {
1554 		device_printf(dev, "Unable to assign PF RDMA interrupt mapping: %s\n",
1555 			      ice_err_str(err));
1556 		goto err_release_pf_imap;
1557 	}
1558 	sc->extra_vectors = extra_vectors;
1559 	/* Setup another resource manager to track the assignments of extra OS
1560 	 * vectors. These OS interrupt allocations don't need to be contiguous,
1561 	 * unlike the ones that come from the device.
1562 	 */
1563 	err = ice_resmgr_init(&sc->os_imgr, sc->extra_vectors);
1564 	if (err) {
1565 		device_printf(dev, "Unable to initialize OS extra interrupt manager: %s\n",
1566 			      ice_err_str(err));
1567 		ice_resmgr_release_map(&sc->dev_imgr, sc->rdma_imap,
1568 					    rdma);
1569 		goto err_release_pf_imap;
1570 	}
1571 	return (0);
1572 
1573 err_release_pf_imap:
1574 	ice_resmgr_release_map(&sc->dev_imgr, sc->pf_imap,
1575 				    sc->lan_vectors);
1576 err_pci_release_msi:
1577 	pci_release_msi(dev);
1578 err_free_msix_table:
1579 	if (sc->msix_table != NULL) {
1580 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
1581 				rman_get_rid(sc->msix_table),
1582 				sc->msix_table);
1583 		sc->msix_table = NULL;
1584 	}
1585 
1586 	return (err);
1587 }
1588 
1589 /**
1590  * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues
1591  * @ctx: the iflib context structure
1592  * @msix: the number of vectors we were assigned
1593  *
1594  * Called by iflib to assign MSI-X vectors to queues. Currently requires that
1595  * we get at least the same number of vectors as we have queues, and that we
1596  * always have the same number of Tx and Rx queues.
1597  *
1598  * Tx queues use a softirq instead of using their own hardware interrupt.
1599  */
1600 static int
1601 ice_if_msix_intr_assign(if_ctx_t ctx, int msix)
1602 {
1603 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1604 	struct ice_vsi *vsi = &sc->pf_vsi;
1605 	int err, i, vector;
1606 
1607 	ASSERT_CTX_LOCKED(sc);
1608 
1609 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
1610 		device_printf(sc->dev,
1611 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
1612 			      vsi->num_tx_queues, vsi->num_rx_queues);
1613 		return (EOPNOTSUPP);
1614 	}
1615 
1616 	if (msix < (vsi->num_rx_queues + 1)) {
1617 		device_printf(sc->dev,
1618 			      "Not enough MSI-X vectors to assign one vector to each queue pair\n");
1619 		return (EOPNOTSUPP);
1620 	}
1621 
1622 	/* Save the number of vectors for future use */
1623 	sc->num_irq_vectors = vsi->num_rx_queues + 1;
1624 
1625 	/* Allocate space to store the IRQ vector data */
1626 	if (!(sc->irqvs =
1627 	      (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors),
1628 					       M_ICE, M_NOWAIT))) {
1629 		device_printf(sc->dev,
1630 			      "Unable to allocate irqv memory\n");
1631 		return (ENOMEM);
1632 	}
1633 
1634 	/* Administrative interrupt events will use vector 0 */
1635 	err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN,
1636 				      ice_msix_admin, sc, 0, "admin");
1637 	if (err) {
1638 		device_printf(sc->dev,
1639 			      "Failed to register Admin queue handler: %s\n",
1640 			      ice_err_str(err));
1641 		goto free_irqvs;
1642 	}
1643 	sc->irqvs[0].me = 0;
1644 
1645 	/* Do not allocate queue interrupts when in recovery mode */
1646 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1647 		return (0);
1648 
1649 	int rid;
1650 	for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) {
1651 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1652 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1653 		char irq_name[16];
1654 
1655 		rid = vector + 1;
1656 
1657 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
1658 		err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid,
1659 					      IFLIB_INTR_RXTX, ice_msix_que,
1660 					      rxq, rxq->me, irq_name);
1661 		if (err) {
1662 			device_printf(sc->dev,
1663 				      "Failed to allocate q int %d err: %s\n",
1664 				      i, ice_err_str(err));
1665 			vector--;
1666 			i--;
1667 			goto fail;
1668 		}
1669 		sc->irqvs[vector].me = vector;
1670 		rxq->irqv = &sc->irqvs[vector];
1671 
1672 		bzero(irq_name, sizeof(irq_name));
1673 
1674 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
1675 		iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq,
1676 					    IFLIB_INTR_TX, txq,
1677 					    txq->me, irq_name);
1678 		txq->irqv = &sc->irqvs[vector];
1679 	}
1680 
1681 	/* For future interrupt assignments */
1682 	sc->last_rid = rid + sc->irdma_vectors;
1683 
1684 	return (0);
1685 fail:
1686 	for (; i >= 0; i--, vector--)
1687 		iflib_irq_free(ctx, &sc->irqvs[vector].irq);
1688 	iflib_irq_free(ctx, &sc->irqvs[0].irq);
1689 free_irqvs:
1690 	free(sc->irqvs, M_ICE);
1691 	sc->irqvs = NULL;
1692 	return err;
1693 }
1694 
1695 /**
1696  * ice_if_mtu_set - Set the device MTU
1697  * @ctx: iflib context structure
1698  * @mtu: the MTU requested
1699  *
1700  * Called by iflib to configure the device's Maximum Transmission Unit (MTU).
1701  *
1702  * @pre assumes the caller holds the iflib CTX lock
1703  */
1704 static int
1705 ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu)
1706 {
1707 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1708 
1709 	ASSERT_CTX_LOCKED(sc);
1710 
1711 	/* Do not support configuration when in recovery mode */
1712 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1713 		return (ENOSYS);
1714 
1715 	if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU)
1716 		return (EINVAL);
1717 
1718 	sc->scctx->isc_max_frame_size = mtu +
1719 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1720 
1721 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
1722 
1723 	return (0);
1724 }
1725 
1726 /**
1727  * ice_if_intr_enable - Enable device interrupts
1728  * @ctx: iflib context structure
1729  *
1730  * Called by iflib to request enabling device interrupts.
1731  */
1732 static void
1733 ice_if_intr_enable(if_ctx_t ctx)
1734 {
1735 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1736 	struct ice_vsi *vsi = &sc->pf_vsi;
1737 	struct ice_hw *hw = &sc->hw;
1738 
1739 	ASSERT_CTX_LOCKED(sc);
1740 
1741 	/* Enable ITR 0 */
1742 	ice_enable_intr(hw, sc->irqvs[0].me);
1743 
1744 	/* Do not enable queue interrupts in recovery mode */
1745 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1746 		return;
1747 
1748 	/* Enable all queue interrupts */
1749 	for (int i = 0; i < vsi->num_rx_queues; i++)
1750 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
1751 }
1752 
1753 /**
1754  * ice_if_intr_disable - Disable device interrupts
1755  * @ctx: iflib context structure
1756  *
1757  * Called by iflib to request disabling device interrupts.
1758  */
1759 static void
1760 ice_if_intr_disable(if_ctx_t ctx)
1761 {
1762 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1763 	struct ice_hw *hw = &sc->hw;
1764 	unsigned int i;
1765 
1766 	ASSERT_CTX_LOCKED(sc);
1767 
1768 	/* IFDI_INTR_DISABLE may be called prior to interrupts actually being
1769 	 * assigned to queues. Instead of assuming that the interrupt
1770 	 * assignment in the rx_queues structure is valid, just disable all
1771 	 * possible interrupts
1772 	 *
1773 	 * Note that we choose not to disable ITR 0 because this handles the
1774 	 * AdminQ interrupts, and we want to keep processing these even when
1775 	 * the interface is offline.
1776 	 */
1777 	for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++)
1778 		ice_disable_intr(hw, i);
1779 }
1780 
1781 /**
1782  * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
1783  * @ctx: iflib context structure
1784  * @rxqid: the Rx queue to enable
1785  *
1786  * Enable a specific Rx queue interrupt.
1787  *
1788  * This function is not protected by the iflib CTX lock.
1789  */
1790 static int
1791 ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
1792 {
1793 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1794 	struct ice_vsi *vsi = &sc->pf_vsi;
1795 	struct ice_hw *hw = &sc->hw;
1796 
1797 	/* Do not enable queue interrupts in recovery mode */
1798 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1799 		return (ENOSYS);
1800 
1801 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
1802 	return (0);
1803 }
1804 
1805 /**
1806  * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
1807  * @ctx: iflib context structure
1808  * @txqid: the Tx queue to enable
1809  *
1810  * Enable a specific Tx queue interrupt.
1811  *
1812  * This function is not protected by the iflib CTX lock.
1813  */
1814 static int
1815 ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
1816 {
1817 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1818 	struct ice_vsi *vsi = &sc->pf_vsi;
1819 	struct ice_hw *hw = &sc->hw;
1820 
1821 	/* Do not enable queue interrupts in recovery mode */
1822 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1823 		return (ENOSYS);
1824 
1825 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
1826 	return (0);
1827 }
1828 
1829 /**
1830  * ice_set_default_promisc_mask - Set default config for promisc settings
1831  * @promisc_mask: bitmask to setup
1832  *
1833  * The ice_(set|clear)_vsi_promisc() function expects a mask of promiscuous
1834  * modes to operate on. The mask used in here is the default one for the
1835  * driver, where promiscuous is enabled/disabled for all types of
1836  * non-VLAN-tagged/VLAN 0 traffic.
1837  */
1838 static void
1839 ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask)
1840 {
1841 	ice_zero_bitmap(promisc_mask, ICE_PROMISC_MAX);
1842 	ice_set_bit(ICE_PROMISC_UCAST_TX, promisc_mask);
1843 	ice_set_bit(ICE_PROMISC_UCAST_RX, promisc_mask);
1844 	ice_set_bit(ICE_PROMISC_MCAST_TX, promisc_mask);
1845 	ice_set_bit(ICE_PROMISC_MCAST_RX, promisc_mask);
1846 }
1847 
1848 /**
1849  * ice_if_promisc_set - Set device promiscuous mode
1850  * @ctx: iflib context structure
1851  * @flags: promiscuous flags to configure
1852  *
1853  * Called by iflib to configure device promiscuous mode.
1854  *
1855  * @remark Calls to this function will always overwrite the previous setting
1856  */
1857 static int
1858 ice_if_promisc_set(if_ctx_t ctx, int flags)
1859 {
1860 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1861 	struct ice_hw *hw = &sc->hw;
1862 	device_t dev = sc->dev;
1863 	enum ice_status status;
1864 	bool promisc_enable = flags & IFF_PROMISC;
1865 	bool multi_enable = flags & IFF_ALLMULTI;
1866 	ice_declare_bitmap(promisc_mask, ICE_PROMISC_MAX);
1867 
1868 	/* Do not support configuration when in recovery mode */
1869 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1870 		return (ENOSYS);
1871 
1872 	ice_set_default_promisc_mask(promisc_mask);
1873 
1874 	if (multi_enable)
1875 		return (EOPNOTSUPP);
1876 
1877 	if (promisc_enable) {
1878 		status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx,
1879 					     promisc_mask, 0);
1880 		if (status && status != ICE_ERR_ALREADY_EXISTS) {
1881 			device_printf(dev,
1882 				      "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n",
1883 				      ice_status_str(status),
1884 				      ice_aq_str(hw->adminq.sq_last_status));
1885 			return (EIO);
1886 		}
1887 	} else {
1888 		status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx,
1889 					       promisc_mask, 0);
1890 		if (status) {
1891 			device_printf(dev,
1892 				      "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n",
1893 				      ice_status_str(status),
1894 				      ice_aq_str(hw->adminq.sq_last_status));
1895 			return (EIO);
1896 		}
1897 	}
1898 
1899 	return (0);
1900 }
1901 
1902 /**
1903  * ice_if_media_change - Change device media
1904  * @ctx: device ctx structure
1905  *
1906  * Called by iflib when a media change is requested. This operation is not
1907  * supported by the hardware, so we just return an error code.
1908  */
1909 static int
1910 ice_if_media_change(if_ctx_t ctx)
1911 {
1912 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1913 
1914 	device_printf(sc->dev, "Media change is not supported.\n");
1915 	return (ENODEV);
1916 }
1917 
1918 /**
1919  * ice_if_media_status - Report current device media
1920  * @ctx: iflib context structure
1921  * @ifmr: ifmedia request structure to update
1922  *
1923  * Updates the provided ifmr with current device media status, including link
1924  * status and media type.
1925  */
1926 static void
1927 ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr)
1928 {
1929 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1930 	struct ice_link_status *li = &sc->hw.port_info->phy.link_info;
1931 
1932 	ifmr->ifm_status = IFM_AVALID;
1933 	ifmr->ifm_active = IFM_ETHER;
1934 
1935 	/* Never report link up or media types when in recovery mode */
1936 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1937 		return;
1938 
1939 	if (!sc->link_up)
1940 		return;
1941 
1942 	ifmr->ifm_status |= IFM_ACTIVE;
1943 	ifmr->ifm_active |= IFM_FDX;
1944 
1945 	if (li->phy_type_low)
1946 		ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low);
1947 	else if (li->phy_type_high)
1948 		ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high);
1949 	else
1950 		ifmr->ifm_active |= IFM_UNKNOWN;
1951 
1952 	/* Report flow control status as well */
1953 	if (li->an_info & ICE_AQ_LINK_PAUSE_TX)
1954 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1955 	if (li->an_info & ICE_AQ_LINK_PAUSE_RX)
1956 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1957 }
1958 
1959 /**
1960  * ice_init_tx_tracking - Initialize Tx queue software tracking values
1961  * @vsi: the VSI to initialize
1962  *
1963  * Initialize Tx queue software tracking values, including the Report Status
1964  * queue, and related software tracking values.
1965  */
1966 static void
1967 ice_init_tx_tracking(struct ice_vsi *vsi)
1968 {
1969 	struct ice_tx_queue *txq;
1970 	size_t j;
1971 	int i;
1972 
1973 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1974 
1975 		txq->tx_rs_cidx = txq->tx_rs_pidx = 0;
1976 
1977 		/* Initialize the last processed descriptor to be the end of
1978 		 * the ring, rather than the start, so that we avoid an
1979 		 * off-by-one error in ice_ift_txd_credits_update for the
1980 		 * first packet.
1981 		 */
1982 		txq->tx_cidx_processed = txq->desc_count - 1;
1983 
1984 		for (j = 0; j < txq->desc_count; j++)
1985 			txq->tx_rsq[j] = QIDX_INVALID;
1986 	}
1987 }
1988 
1989 /**
1990  * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues
1991  * @sc: the device softc
1992  *
1993  * Called to update the Rx queue mbuf_sz parameter for configuring the receive
1994  * buffer sizes when programming hardware.
1995  */
1996 static void
1997 ice_update_rx_mbuf_sz(struct ice_softc *sc)
1998 {
1999 	uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx);
2000 	struct ice_vsi *vsi = &sc->pf_vsi;
2001 
2002 	MPASS(mbuf_sz <= UINT16_MAX);
2003 	vsi->mbuf_sz = mbuf_sz;
2004 }
2005 
2006 /**
2007  * ice_if_init - Initialize the device
2008  * @ctx: iflib ctx structure
2009  *
2010  * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes
2011  * device filters and prepares the Tx and Rx engines.
2012  *
2013  * @pre assumes the caller holds the iflib CTX lock
2014  */
2015 static void
2016 ice_if_init(if_ctx_t ctx)
2017 {
2018 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
2019 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2020 	device_t dev = sc->dev;
2021 	int err;
2022 
2023 	ASSERT_CTX_LOCKED(sc);
2024 
2025 	/*
2026 	 * We've seen an issue with 11.3/12.1 where sideband routines are
2027 	 * called after detach is called.  This would call routines after
2028 	 * if_stop, causing issues with the teardown process.  This has
2029 	 * seemingly been fixed in STABLE snapshots, but it seems like a
2030 	 * good idea to have this guard here regardless.
2031 	 */
2032 	if (ice_driver_is_detaching(sc))
2033 		return;
2034 
2035 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2036 		return;
2037 
2038 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
2039 		device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n");
2040 		return;
2041 	}
2042 
2043 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
2044 		device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n");
2045 		return;
2046 	}
2047 
2048 	ice_update_rx_mbuf_sz(sc);
2049 
2050 	/* Update the MAC address... User might use a LAA */
2051 	err = ice_update_laa_mac(sc);
2052 	if (err) {
2053 		device_printf(dev,
2054 			      "LAA address change failed, err %s\n",
2055 			      ice_err_str(err));
2056 		return;
2057 	}
2058 
2059 	/* Initialize software Tx tracking values */
2060 	ice_init_tx_tracking(&sc->pf_vsi);
2061 
2062 	err = ice_cfg_vsi_for_tx(&sc->pf_vsi);
2063 	if (err) {
2064 		device_printf(dev,
2065 			      "Unable to configure the main VSI for Tx: %s\n",
2066 			      ice_err_str(err));
2067 		return;
2068 	}
2069 
2070 	err = ice_cfg_vsi_for_rx(&sc->pf_vsi);
2071 	if (err) {
2072 		device_printf(dev,
2073 			      "Unable to configure the main VSI for Rx: %s\n",
2074 			      ice_err_str(err));
2075 		goto err_cleanup_tx;
2076 	}
2077 
2078 	err = ice_control_all_rx_queues(&sc->pf_vsi, true);
2079 	if (err) {
2080 		device_printf(dev,
2081 			      "Unable to enable Rx rings for transmit: %s\n",
2082 			      ice_err_str(err));
2083 		goto err_cleanup_tx;
2084 	}
2085 
2086 	err = ice_cfg_pf_default_mac_filters(sc);
2087 	if (err) {
2088 		device_printf(dev,
2089 			      "Unable to configure default MAC filters: %s\n",
2090 			      ice_err_str(err));
2091 		goto err_stop_rx;
2092 	}
2093 
2094 	/* We use software interrupts for Tx, so we only program the hardware
2095 	 * interrupts for Rx.
2096 	 */
2097 	ice_configure_all_rxq_interrupts(&sc->pf_vsi);
2098 	ice_configure_rx_itr(&sc->pf_vsi);
2099 
2100 	/* Configure promiscuous mode */
2101 	ice_if_promisc_set(ctx, if_getflags(sc->ifp));
2102 
2103 	if (!ice_testandclear_state(&sc->state, ICE_STATE_FIRST_INIT_LINK))
2104 		if (!sc->link_up && ((if_getflags(sc->ifp) & IFF_UP) ||
2105 			 ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN)))
2106 			ice_set_link(sc, true);
2107 
2108 	ice_rdma_pf_init(sc);
2109 
2110 	ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
2111 
2112 	if (sc->mirr_if && ice_testandclear_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT)) {
2113 		ice_clear_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED);
2114 		iflib_request_reset(sc->mirr_if->subctx);
2115 		iflib_admin_intr_deferred(sc->mirr_if->subctx);
2116 	}
2117 
2118 	return;
2119 
2120 err_stop_rx:
2121 	ice_control_all_rx_queues(&sc->pf_vsi, false);
2122 err_cleanup_tx:
2123 	ice_vsi_disable_tx(&sc->pf_vsi);
2124 }
2125 
2126 /**
2127  * ice_poll_for_media_avail - Re-enable link if media is detected
2128  * @sc: device private structure
2129  *
2130  * Intended to be called from the driver's timer function, this function
2131  * sends the Get Link Status AQ command and re-enables HW link if the
2132  * command says that media is available.
2133  *
2134  * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing,
2135  * since media removal events are supposed to be sent to the driver through
2136  * a link status event.
2137  */
2138 static void
2139 ice_poll_for_media_avail(struct ice_softc *sc)
2140 {
2141 	struct ice_hw *hw = &sc->hw;
2142 	struct ice_port_info *pi = hw->port_info;
2143 
2144 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2145 		pi->phy.get_link_info = true;
2146 		ice_get_link_status(pi, &sc->link_up);
2147 
2148 		if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
2149 			enum ice_status status;
2150 
2151 			/* Re-enable link and re-apply user link settings */
2152 			if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) ||
2153 			    (if_getflags(sc->ifp) & IFF_UP)) {
2154 				ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
2155 
2156 				/* Update the OS about changes in media capability */
2157 				status = ice_add_media_types(sc, sc->media);
2158 				if (status)
2159 					device_printf(sc->dev,
2160 					    "Error adding device media types: %s aq_err %s\n",
2161 					    ice_status_str(status),
2162 					    ice_aq_str(hw->adminq.sq_last_status));
2163 			}
2164 
2165 			ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
2166 		}
2167 	}
2168 }
2169 
2170 /**
2171  * ice_if_timer - called by iflib periodically
2172  * @ctx: iflib ctx structure
2173  * @qid: the queue this timer was called for
2174  *
2175  * This callback is triggered by iflib periodically. We use it to update the
2176  * hw statistics.
2177  *
2178  * @remark this function is not protected by the iflib CTX lock.
2179  */
2180 static void
2181 ice_if_timer(if_ctx_t ctx, uint16_t qid)
2182 {
2183 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2184 	uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx;
2185 
2186 	if (qid != 0)
2187 		return;
2188 
2189 	/* Do not attempt to update stats when in recovery mode */
2190 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2191 		return;
2192 
2193 	/* Update device statistics */
2194 	ice_update_pf_stats(sc);
2195 
2196 	/*
2197 	 * For proper watchdog management, the iflib stack needs to know if
2198 	 * we've been paused during the last interval. Check if the
2199 	 * link_xoff_rx stat changed, and set the isc_pause_frames, if so.
2200 	 */
2201 	if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx)
2202 		sc->scctx->isc_pause_frames = 1;
2203 
2204 	/* Update the primary VSI stats */
2205 	ice_update_vsi_hw_stats(&sc->pf_vsi);
2206 
2207 	/* Update mirror VSI stats */
2208 	if (sc->mirr_if && sc->mirr_if->if_attached)
2209 		ice_update_vsi_hw_stats(sc->mirr_if->vsi);
2210 }
2211 
2212 /**
2213  * ice_admin_timer - called periodically to trigger the admin task
2214  * @arg: callout(9) argument pointing to the device private softc structure
2215  *
2216  * Timer function used as part of a callout(9) timer that will periodically
2217  * trigger the admin task, even when the interface is down.
2218  *
2219  * @remark this function is not called by iflib and is not protected by the
2220  * iflib CTX lock.
2221  *
2222  * @remark because this is a callout function, it cannot sleep and should not
2223  * attempt taking the iflib CTX lock.
2224  */
2225 static void
2226 ice_admin_timer(void *arg)
2227 {
2228 	struct ice_softc *sc = (struct ice_softc *)arg;
2229 
2230 	/*
2231 	 * There is a point where callout routines are no longer
2232 	 * cancelable.  So there exists a window of time where the
2233 	 * driver enters detach() and tries to cancel the callout, but the
2234 	 * callout routine has passed the cancellation point.  The detach()
2235 	 * routine is unaware of this and tries to free resources that the
2236 	 * callout routine needs.  So we check for the detach state flag to
2237 	 * at least shrink the window of opportunity.
2238 	 */
2239 	if (ice_driver_is_detaching(sc))
2240 		return;
2241 
2242 	/* Fire off the admin task */
2243 	iflib_admin_intr_deferred(sc->ctx);
2244 
2245 	/* Reschedule the admin timer */
2246 	callout_schedule(&sc->admin_timer, hz/2);
2247 }
2248 
2249 /**
2250  * ice_transition_recovery_mode - Transition to recovery mode
2251  * @sc: the device private softc
2252  *
2253  * Called when the driver detects that the firmware has entered recovery mode
2254  * at run time.
2255  */
2256 static void
2257 ice_transition_recovery_mode(struct ice_softc *sc)
2258 {
2259 	struct ice_vsi *vsi = &sc->pf_vsi;
2260 	int i;
2261 
2262 	device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
2263 
2264 	/* Tell the stack that the link has gone down */
2265 	iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
2266 
2267 	/* Request that the device be re-initialized */
2268 	ice_request_stack_reinit(sc);
2269 
2270 	ice_rdma_pf_detach(sc);
2271 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2272 
2273 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2274 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2275 
2276 	ice_vsi_del_txqs_ctx(vsi);
2277 	ice_vsi_del_rxqs_ctx(vsi);
2278 
2279 	for (i = 0; i < sc->num_available_vsi; i++) {
2280 		if (sc->all_vsi[i])
2281 			ice_release_vsi(sc->all_vsi[i]);
2282 	}
2283 	sc->num_available_vsi = 0;
2284 
2285 	if (sc->all_vsi) {
2286 		free(sc->all_vsi, M_ICE);
2287 		sc->all_vsi = NULL;
2288 	}
2289 
2290 	/* Destroy the interrupt manager */
2291 	ice_resmgr_destroy(&sc->dev_imgr);
2292 	/* Destroy the queue managers */
2293 	ice_resmgr_destroy(&sc->tx_qmgr);
2294 	ice_resmgr_destroy(&sc->rx_qmgr);
2295 
2296 	ice_deinit_hw(&sc->hw);
2297 }
2298 
2299 /**
2300  * ice_transition_safe_mode - Transition to safe mode
2301  * @sc: the device private softc
2302  *
2303  * Called when the driver attempts to reload the DDP package during a device
2304  * reset, and the new download fails. If so, we must transition to safe mode
2305  * at run time.
2306  *
2307  * @remark although safe mode normally allocates only a single queue, we can't
2308  * change the number of queues dynamically when using iflib. Due to this, we
2309  * do not attempt to reduce the number of queues.
2310  */
2311 static void
2312 ice_transition_safe_mode(struct ice_softc *sc)
2313 {
2314 	/* Indicate that we are in Safe mode */
2315 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
2316 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
2317 
2318 	ice_rdma_pf_detach(sc);
2319 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2320 
2321 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2322 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2323 
2324 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2325 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en);
2326 }
2327 
2328 /**
2329  * ice_if_update_admin_status - update admin status
2330  * @ctx: iflib ctx structure
2331  *
2332  * Called by iflib to update the admin status. For our purposes, this means
2333  * check the adminq, and update the link status. It's ultimately triggered by
2334  * our admin interrupt, or by the ice_if_timer periodically.
2335  *
2336  * @pre assumes the caller holds the iflib CTX lock
2337  */
2338 static void
2339 ice_if_update_admin_status(if_ctx_t ctx)
2340 {
2341 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2342 	enum ice_fw_modes fw_mode;
2343 	bool reschedule = false;
2344 	u16 pending = 0;
2345 
2346 	ASSERT_CTX_LOCKED(sc);
2347 
2348 	/* Check if the firmware entered recovery mode at run time */
2349 	fw_mode = ice_get_fw_mode(&sc->hw);
2350 	if (fw_mode == ICE_FW_MODE_REC) {
2351 		if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2352 			/* If we just entered recovery mode, log a warning to
2353 			 * the system administrator and deinit driver state
2354 			 * that is no longer functional.
2355 			 */
2356 			ice_transition_recovery_mode(sc);
2357 		}
2358 	} else if (fw_mode == ICE_FW_MODE_ROLLBACK) {
2359 		if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) {
2360 			/* Rollback mode isn't fatal, but we don't want to
2361 			 * repeatedly post a message about it.
2362 			 */
2363 			ice_print_rollback_msg(&sc->hw);
2364 		}
2365 	}
2366 
2367 	/* Handle global reset events */
2368 	ice_handle_reset_event(sc);
2369 
2370 	/* Handle PF reset requests */
2371 	ice_handle_pf_reset_request(sc);
2372 
2373 	/* Handle MDD events */
2374 	ice_handle_mdd_event(sc);
2375 
2376 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) ||
2377 	    ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) ||
2378 	    ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2379 		/*
2380 		 * If we know the control queues are disabled, skip processing
2381 		 * the control queues entirely.
2382 		 */
2383 		;
2384 	} else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) {
2385 		ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending);
2386 		if (pending > 0)
2387 			reschedule = true;
2388 
2389 		ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending);
2390 		if (pending > 0)
2391 			reschedule = true;
2392 	}
2393 
2394 	/* Poll for link up */
2395 	ice_poll_for_media_avail(sc);
2396 
2397 	/* Check and update link status */
2398 	ice_update_link_status(sc, false);
2399 
2400 	/*
2401 	 * If there are still messages to process, we need to reschedule
2402 	 * ourselves. Otherwise, we can just re-enable the interrupt. We'll be
2403 	 * woken up at the next interrupt or timer event.
2404 	 */
2405 	if (reschedule) {
2406 		ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
2407 		iflib_admin_intr_deferred(ctx);
2408 	} else {
2409 		ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2410 	}
2411 }
2412 
2413 /**
2414  * ice_prepare_for_reset - Prepare device for an impending reset
2415  * @sc: The device private softc
2416  *
2417  * Prepare the driver for an impending reset, shutting down VSIs, clearing the
2418  * scheduler setup, and shutting down controlqs. Uses the
2419  * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the
2420  * driver for reset or not.
2421  */
2422 static void
2423 ice_prepare_for_reset(struct ice_softc *sc)
2424 {
2425 	struct ice_hw *hw = &sc->hw;
2426 
2427 	/* If we're already prepared, there's nothing to do */
2428 	if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET))
2429 		return;
2430 
2431 	log(LOG_INFO, "%s: preparing to reset device logic\n", if_name(sc->ifp));
2432 
2433 	/* In recovery mode, hardware is not initialized */
2434 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2435 		return;
2436 
2437 	/* inform the RDMA client */
2438 	ice_rdma_notify_reset(sc);
2439 	/* stop the RDMA client */
2440 	ice_rdma_pf_stop(sc);
2441 
2442 	/* Release the main PF VSI queue mappings */
2443 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2444 				    sc->pf_vsi.num_tx_queues);
2445 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2446 				    sc->pf_vsi.num_rx_queues);
2447 	if (sc->mirr_if) {
2448 		ice_resmgr_release_map(&sc->tx_qmgr, sc->mirr_if->vsi->tx_qmap,
2449 		    sc->mirr_if->num_irq_vectors);
2450 		ice_resmgr_release_map(&sc->rx_qmgr, sc->mirr_if->vsi->rx_qmap,
2451 		    sc->mirr_if->num_irq_vectors);
2452 	}
2453 
2454 	ice_clear_hw_tbls(hw);
2455 
2456 	if (hw->port_info)
2457 		ice_sched_cleanup_all(hw);
2458 
2459 	ice_shutdown_all_ctrlq(hw, false);
2460 }
2461 
2462 /**
2463  * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping
2464  * @sc: the device softc pointer
2465  *
2466  * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue
2467  * mapping after a reset occurred.
2468  */
2469 static int
2470 ice_rebuild_pf_vsi_qmap(struct ice_softc *sc)
2471 {
2472 	struct ice_vsi *vsi = &sc->pf_vsi;
2473 	struct ice_tx_queue *txq;
2474 	struct ice_rx_queue *rxq;
2475 	int err, i;
2476 
2477 	/* Re-assign Tx queues from PF space to the main VSI */
2478 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap,
2479 					    vsi->num_tx_queues);
2480 	if (err) {
2481 		device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n",
2482 			      ice_err_str(err));
2483 		return (err);
2484 	}
2485 
2486 	/* Re-assign Rx queues from PF space to this VSI */
2487 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap,
2488 					    vsi->num_rx_queues);
2489 	if (err) {
2490 		device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n",
2491 			      ice_err_str(err));
2492 		goto err_release_tx_queues;
2493 	}
2494 
2495 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
2496 
2497 	/* Re-assign Tx queue tail pointers */
2498 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
2499 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
2500 
2501 	/* Re-assign Rx queue tail pointers */
2502 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
2503 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
2504 
2505 	return (0);
2506 
2507 err_release_tx_queues:
2508 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2509 				   sc->pf_vsi.num_tx_queues);
2510 
2511 	return (err);
2512 }
2513 
2514 /* determine if the iflib context is active */
2515 #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING))
2516 
2517 /**
2518  * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode
2519  * @sc: The device private softc
2520  *
2521  * Handle a driver rebuild while in recovery mode. This will only rebuild the
2522  * limited functionality supported while in recovery mode.
2523  */
2524 static void
2525 ice_rebuild_recovery_mode(struct ice_softc *sc)
2526 {
2527 	device_t dev = sc->dev;
2528 
2529 	/* enable PCIe bus master */
2530 	pci_enable_busmaster(dev);
2531 
2532 	/* Configure interrupt causes for the administrative interrupt */
2533 	ice_configure_misc_interrupts(sc);
2534 
2535 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2536 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2537 
2538 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2539 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2540 
2541 	log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp));
2542 
2543 	/* In order to completely restore device functionality, the iflib core
2544 	 * needs to be reset. We need to request an iflib reset. Additionally,
2545 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2546 	 * the iflib core, we also want re-run the admin task so that iflib
2547 	 * resets immediately instead of waiting for the next interrupt.
2548 	 */
2549 	ice_request_stack_reinit(sc);
2550 
2551 	return;
2552 }
2553 
2554 /**
2555  * ice_rebuild - Rebuild driver state post reset
2556  * @sc: The device private softc
2557  *
2558  * Restore driver state after a reset occurred. Restart the controlqs, setup
2559  * the hardware port, and re-enable the VSIs.
2560  */
2561 static void
2562 ice_rebuild(struct ice_softc *sc)
2563 {
2564 	struct ice_hw *hw = &sc->hw;
2565 	device_t dev = sc->dev;
2566 	enum ice_ddp_state pkg_state;
2567 	enum ice_status status;
2568 	int err;
2569 
2570 	sc->rebuild_ticks = ticks;
2571 
2572 	/* If we're rebuilding, then a reset has succeeded. */
2573 	ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED);
2574 
2575 	/*
2576 	 * If the firmware is in recovery mode, only restore the limited
2577 	 * functionality supported by recovery mode.
2578 	 */
2579 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2580 		ice_rebuild_recovery_mode(sc);
2581 		return;
2582 	}
2583 
2584 	/* enable PCIe bus master */
2585 	pci_enable_busmaster(dev);
2586 
2587 	status = ice_init_all_ctrlq(hw);
2588 	if (status) {
2589 		device_printf(dev, "failed to re-init controlqs, err %s\n",
2590 			      ice_status_str(status));
2591 		goto err_shutdown_ctrlq;
2592 	}
2593 
2594 	/* Query the allocated resources for Tx scheduler */
2595 	status = ice_sched_query_res_alloc(hw);
2596 	if (status) {
2597 		device_printf(dev,
2598 			      "Failed to query scheduler resources, err %s aq_err %s\n",
2599 			      ice_status_str(status),
2600 			      ice_aq_str(hw->adminq.sq_last_status));
2601 		goto err_shutdown_ctrlq;
2602 	}
2603 
2604 	/* Re-enable FW logging. Keep going even if this fails */
2605 	status = ice_fwlog_set(hw, &hw->fwlog_cfg);
2606 	if (!status) {
2607 		/*
2608 		 * We should have the most updated cached copy of the
2609 		 * configuration, regardless of whether we're rebuilding
2610 		 * or not.  So we'll simply check to see if logging was
2611 		 * enabled pre-rebuild.
2612 		 */
2613 		if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2614 			status = ice_fwlog_register(hw);
2615 			if (status)
2616 				device_printf(dev, "failed to re-register fw logging, err %s aq_err %s\n",
2617 				   ice_status_str(status),
2618 				   ice_aq_str(hw->adminq.sq_last_status));
2619 		}
2620 	} else
2621 		device_printf(dev, "failed to rebuild fw logging configuration, err %s aq_err %s\n",
2622 		   ice_status_str(status),
2623 		   ice_aq_str(hw->adminq.sq_last_status));
2624 
2625 	err = ice_send_version(sc);
2626 	if (err)
2627 		goto err_shutdown_ctrlq;
2628 
2629 	err = ice_init_link_events(sc);
2630 	if (err) {
2631 		device_printf(dev, "ice_init_link_events failed: %s\n",
2632 			      ice_err_str(err));
2633 		goto err_shutdown_ctrlq;
2634 	}
2635 
2636 	status = ice_clear_pf_cfg(hw);
2637 	if (status) {
2638 		device_printf(dev, "failed to clear PF configuration, err %s\n",
2639 			      ice_status_str(status));
2640 		goto err_shutdown_ctrlq;
2641 	}
2642 
2643 	ice_clean_all_vsi_rss_cfg(sc);
2644 
2645 	ice_clear_pxe_mode(hw);
2646 
2647 	status = ice_get_caps(hw);
2648 	if (status) {
2649 		device_printf(dev, "failed to get capabilities, err %s\n",
2650 			      ice_status_str(status));
2651 		goto err_shutdown_ctrlq;
2652 	}
2653 
2654 	status = ice_sched_init_port(hw->port_info);
2655 	if (status) {
2656 		device_printf(dev, "failed to initialize port, err %s\n",
2657 			      ice_status_str(status));
2658 		goto err_sched_cleanup;
2659 	}
2660 
2661 	/* If we previously loaded the package, it needs to be reloaded now */
2662 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) {
2663 		pkg_state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
2664 		if (!ice_is_init_pkg_successful(pkg_state)) {
2665 			ice_log_pkg_init(sc, pkg_state);
2666 			ice_transition_safe_mode(sc);
2667 		}
2668 	}
2669 
2670 	ice_reset_pf_stats(sc);
2671 
2672 	err = ice_rebuild_pf_vsi_qmap(sc);
2673 	if (err) {
2674 		device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n",
2675 			      ice_err_str(err));
2676 		goto err_sched_cleanup;
2677 	}
2678 	err = ice_initialize_vsi(&sc->pf_vsi);
2679 	if (err) {
2680 		device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n",
2681 			      ice_err_str(err));
2682 		goto err_release_queue_allocations;
2683 	}
2684 
2685 	/* Replay all VSI configuration */
2686 	err = ice_replay_all_vsi_cfg(sc);
2687 	if (err)
2688 		goto err_deinit_pf_vsi;
2689 
2690 	/* Re-enable FW health event reporting */
2691 	ice_init_health_events(sc);
2692 
2693 	/* Reconfigure the main PF VSI for RSS */
2694 	err = ice_config_rss(&sc->pf_vsi);
2695 	if (err) {
2696 		device_printf(sc->dev,
2697 			      "Unable to reconfigure RSS for the main VSI, err %s\n",
2698 			      ice_err_str(err));
2699 		goto err_deinit_pf_vsi;
2700 	}
2701 
2702 	if (hw->port_info->qos_cfg.is_sw_lldp)
2703 		ice_add_rx_lldp_filter(sc);
2704 
2705 	/* Refresh link status */
2706 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2707 	sc->hw.port_info->phy.get_link_info = true;
2708 	ice_get_link_status(sc->hw.port_info, &sc->link_up);
2709 	ice_update_link_status(sc, true);
2710 
2711 	/* RDMA interface will be restarted by the stack re-init */
2712 
2713 	/* Configure interrupt causes for the administrative interrupt */
2714 	ice_configure_misc_interrupts(sc);
2715 
2716 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2717 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2718 
2719 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2720 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2721 
2722 	/* Reconfigure the subinterface */
2723 	if (sc->mirr_if) {
2724 		err = ice_subif_rebuild(sc);
2725 		if (err)
2726 			goto err_deinit_pf_vsi;
2727 	}
2728 
2729 	log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname);
2730 
2731 	/* In order to completely restore device functionality, the iflib core
2732 	 * needs to be reset. We need to request an iflib reset. Additionally,
2733 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2734 	 * the iflib core, we also want re-run the admin task so that iflib
2735 	 * resets immediately instead of waiting for the next interrupt.
2736 	 * If LLDP is enabled we need to reconfig DCB to properly reinit all TC
2737 	 * queues, not only 0. It contains ice_request_stack_reinit as well.
2738 	 */
2739 	if (hw->port_info->qos_cfg.is_sw_lldp)
2740 		ice_request_stack_reinit(sc);
2741 	else
2742 		ice_do_dcb_reconfig(sc, false);
2743 
2744 	return;
2745 
2746 err_deinit_pf_vsi:
2747 	ice_deinit_vsi(&sc->pf_vsi);
2748 err_release_queue_allocations:
2749 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2750 				    sc->pf_vsi.num_tx_queues);
2751 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2752 				    sc->pf_vsi.num_rx_queues);
2753 err_sched_cleanup:
2754 	ice_sched_cleanup_all(hw);
2755 err_shutdown_ctrlq:
2756 	ice_shutdown_all_ctrlq(hw, false);
2757 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2758 	ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2759 	device_printf(dev, "Driver rebuild failed, please reload the device driver\n");
2760 }
2761 
2762 /**
2763  * ice_handle_reset_event - Handle reset events triggered by OICR
2764  * @sc: The device private softc
2765  *
2766  * Handle reset events triggered by an OICR notification. This includes CORER,
2767  * GLOBR, and EMPR resets triggered by software on this or any other PF or by
2768  * firmware.
2769  *
2770  * @pre assumes the iflib context lock is held, and will unlock it while
2771  * waiting for the hardware to finish reset.
2772  */
2773 static void
2774 ice_handle_reset_event(struct ice_softc *sc)
2775 {
2776 	struct ice_hw *hw = &sc->hw;
2777 	enum ice_status status;
2778 	device_t dev = sc->dev;
2779 
2780 	/* When a CORER, GLOBR, or EMPR is about to happen, the hardware will
2781 	 * trigger an OICR interrupt. Our OICR handler will determine when
2782 	 * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as
2783 	 * appropriate.
2784 	 */
2785 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
2786 		return;
2787 
2788 	ice_prepare_for_reset(sc);
2789 
2790 	/*
2791 	 * Release the iflib context lock and wait for the device to finish
2792 	 * resetting.
2793 	 */
2794 	IFLIB_CTX_UNLOCK(sc);
2795 	status = ice_check_reset(hw);
2796 	IFLIB_CTX_LOCK(sc);
2797 	if (status) {
2798 		device_printf(dev, "Device never came out of reset, err %s\n",
2799 			      ice_status_str(status));
2800 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2801 		return;
2802 	}
2803 
2804 	/* We're done with the reset, so we can rebuild driver state */
2805 	sc->hw.reset_ongoing = false;
2806 	ice_rebuild(sc);
2807 
2808 	/* In the unlikely event that a PF reset request occurs at the same
2809 	 * time as a global reset, clear the request now. This avoids
2810 	 * resetting a second time right after we reset due to a global event.
2811 	 */
2812 	if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2813 		device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n");
2814 }
2815 
2816 /**
2817  * ice_handle_pf_reset_request - Initiate PF reset requested by software
2818  * @sc: The device private softc
2819  *
2820  * Initiate a PF reset requested by software. We handle this in the admin task
2821  * so that only one thread actually handles driver preparation and cleanup,
2822  * rather than having multiple threads possibly attempt to run this code
2823  * simultaneously.
2824  *
2825  * @pre assumes the iflib context lock is held and will unlock it while
2826  * waiting for the PF reset to complete.
2827  */
2828 static void
2829 ice_handle_pf_reset_request(struct ice_softc *sc)
2830 {
2831 	struct ice_hw *hw = &sc->hw;
2832 	enum ice_status status;
2833 
2834 	/* Check for PF reset requests */
2835 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2836 		return;
2837 
2838 	/* Make sure we're prepared for reset */
2839 	ice_prepare_for_reset(sc);
2840 
2841 	/*
2842 	 * Release the iflib context lock and wait for the device to finish
2843 	 * resetting.
2844 	 */
2845 	IFLIB_CTX_UNLOCK(sc);
2846 	status = ice_reset(hw, ICE_RESET_PFR);
2847 	IFLIB_CTX_LOCK(sc);
2848 	if (status) {
2849 		device_printf(sc->dev, "device PF reset failed, err %s\n",
2850 			      ice_status_str(status));
2851 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2852 		return;
2853 	}
2854 
2855 	sc->soft_stats.pfr_count++;
2856 	ice_rebuild(sc);
2857 }
2858 
2859 /**
2860  * ice_init_device_features - Init device driver features
2861  * @sc: driver softc structure
2862  *
2863  * @pre assumes that the function capabilities bits have been set up by
2864  * ice_init_hw().
2865  */
2866 static void
2867 ice_init_device_features(struct ice_softc *sc)
2868 {
2869 	struct ice_hw *hw = &sc->hw;
2870 
2871 	/* Set capabilities that all devices support */
2872 	ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2873 	ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
2874 	ice_set_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2875 	ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
2876 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap);
2877 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap);
2878 	ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2879 	ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2880 	ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_cap);
2881 	ice_set_bit(ICE_FEATURE_DCB, sc->feat_cap);
2882 	ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2883 
2884 	/* Disable features due to hardware limitations... */
2885 	if (!hw->func_caps.common_cap.rss_table_size)
2886 		ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2887 	if (!hw->func_caps.common_cap.iwarp || !ice_enable_irdma)
2888 		ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2889 	if (!hw->func_caps.common_cap.dcb)
2890 		ice_clear_bit(ICE_FEATURE_DCB, sc->feat_cap);
2891 	/* Disable features due to firmware limitations... */
2892 	if (!ice_is_fw_health_report_supported(hw))
2893 		ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2894 	if (!ice_fwlog_supported(hw))
2895 		ice_clear_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2896 	if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2897 		if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_FW_LOGGING))
2898 			ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_en);
2899 		else
2900 			ice_fwlog_unregister(hw);
2901 	}
2902 
2903 	/* Disable capabilities not supported by the OS */
2904 	ice_disable_unsupported_features(sc->feat_cap);
2905 
2906 	/* RSS is always enabled for iflib */
2907 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS))
2908 		ice_set_bit(ICE_FEATURE_RSS, sc->feat_en);
2909 
2910 	/* Disable features based on sysctl settings */
2911 	if (!ice_tx_balance_en)
2912 		ice_clear_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2913 
2914 	if (hw->dev_caps.supported_sensors & ICE_SENSOR_SUPPORT_E810_INT_TEMP) {
2915 		ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_cap);
2916 		ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_en);
2917 	}
2918 }
2919 
2920 /**
2921  * ice_if_multi_set - Callback to update Multicast filters in HW
2922  * @ctx: iflib ctx structure
2923  *
2924  * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search
2925  * the if_multiaddrs list and determine which filters have been added or
2926  * removed from the list, and update HW programming to reflect the new list.
2927  *
2928  * @pre assumes the caller holds the iflib CTX lock
2929  */
2930 static void
2931 ice_if_multi_set(if_ctx_t ctx)
2932 {
2933 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2934 	int err;
2935 
2936 	ASSERT_CTX_LOCKED(sc);
2937 
2938 	/* Do not handle multicast configuration in recovery mode */
2939 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2940 		return;
2941 
2942 	err = ice_sync_multicast_filters(sc);
2943 	if (err) {
2944 		device_printf(sc->dev,
2945 			      "Failed to synchronize multicast filter list: %s\n",
2946 			      ice_err_str(err));
2947 		return;
2948 	}
2949 }
2950 
2951 /**
2952  * ice_if_vlan_register - Register a VLAN with the hardware
2953  * @ctx: iflib ctx pointer
2954  * @vtag: VLAN to add
2955  *
2956  * Programs the main PF VSI with a hardware filter for the given VLAN.
2957  *
2958  * @pre assumes the caller holds the iflib CTX lock
2959  */
2960 static void
2961 ice_if_vlan_register(if_ctx_t ctx, u16 vtag)
2962 {
2963 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2964 	enum ice_status status;
2965 
2966 	ASSERT_CTX_LOCKED(sc);
2967 
2968 	/* Do not handle VLAN configuration in recovery mode */
2969 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2970 		return;
2971 
2972 	status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag);
2973 	if (status) {
2974 		device_printf(sc->dev,
2975 			      "Failure adding VLAN %d to main VSI, err %s aq_err %s\n",
2976 			      vtag, ice_status_str(status),
2977 			      ice_aq_str(sc->hw.adminq.sq_last_status));
2978 	}
2979 }
2980 
2981 /**
2982  * ice_if_vlan_unregister - Remove a VLAN filter from the hardware
2983  * @ctx: iflib ctx pointer
2984  * @vtag: VLAN to add
2985  *
2986  * Removes the previously programmed VLAN filter from the main PF VSI.
2987  *
2988  * @pre assumes the caller holds the iflib CTX lock
2989  */
2990 static void
2991 ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag)
2992 {
2993 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2994 	enum ice_status status;
2995 
2996 	ASSERT_CTX_LOCKED(sc);
2997 
2998 	/* Do not handle VLAN configuration in recovery mode */
2999 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
3000 		return;
3001 
3002 	status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag);
3003 	if (status) {
3004 		device_printf(sc->dev,
3005 			      "Failure removing VLAN %d from main VSI, err %s aq_err %s\n",
3006 			      vtag, ice_status_str(status),
3007 			      ice_aq_str(sc->hw.adminq.sq_last_status));
3008 	}
3009 }
3010 
3011 /**
3012  * ice_if_stop - Stop the device
3013  * @ctx: iflib context structure
3014  *
3015  * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0
3016  * down)
3017  *
3018  * @pre assumes the caller holds the iflib CTX lock
3019  */
3020 static void
3021 ice_if_stop(if_ctx_t ctx)
3022 {
3023 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3024 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3025 
3026 	ASSERT_CTX_LOCKED(sc);
3027 
3028 	/*
3029 	 * The iflib core may call IFDI_STOP prior to the first call to
3030 	 * IFDI_INIT. This will cause us to attempt to remove MAC filters we
3031 	 * don't have, and disable Tx queues which aren't yet configured.
3032 	 * Although it is likely these extra operations are harmless, they do
3033 	 * cause spurious warning messages to be displayed, which may confuse
3034 	 * users.
3035 	 *
3036 	 * To avoid these messages, we use a state bit indicating if we've
3037 	 * been initialized. It will be set when ice_if_init is called, and
3038 	 * cleared here in ice_if_stop.
3039 	 */
3040 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
3041 		return;
3042 
3043 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
3044 		device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n");
3045 		return;
3046 	}
3047 
3048 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
3049 		device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n");
3050 		return;
3051 	}
3052 
3053 	ice_rdma_pf_stop(sc);
3054 
3055 	/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
3056 	 * return of these functions because there's nothing we can really do
3057 	 * if they fail, and the functions already print error messages.
3058 	 * Just try to shut down as much as we can.
3059 	 */
3060 	ice_rm_pf_default_mac_filters(sc);
3061 
3062 	/* Dissociate the Tx and Rx queues from the interrupts */
3063 	ice_flush_txq_interrupts(&sc->pf_vsi);
3064 	ice_flush_rxq_interrupts(&sc->pf_vsi);
3065 
3066 	/* Disable the Tx and Rx queues */
3067 	ice_vsi_disable_tx(&sc->pf_vsi);
3068 	ice_control_all_rx_queues(&sc->pf_vsi, false);
3069 
3070 	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
3071 		 !(if_getflags(sc->ifp) & IFF_UP) && sc->link_up)
3072 		ice_set_link(sc, false);
3073 
3074 	if (sc->mirr_if && ice_test_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT)) {
3075 		ice_subif_if_stop(sc->mirr_if->subctx);
3076 		device_printf(sc->dev, "The subinterface also comes down and up after reset\n");
3077 	}
3078 }
3079 
3080 /**
3081  * ice_if_get_counter - Get current value of an ifnet statistic
3082  * @ctx: iflib context pointer
3083  * @counter: ifnet counter to read
3084  *
3085  * Reads the current value of an ifnet counter for the device.
3086  *
3087  * This function is not protected by the iflib CTX lock.
3088  */
3089 static uint64_t
3090 ice_if_get_counter(if_ctx_t ctx, ift_counter counter)
3091 {
3092 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3093 
3094 	/* Return the counter for the main PF VSI */
3095 	return ice_get_ifnet_counter(&sc->pf_vsi, counter);
3096 }
3097 
3098 /**
3099  * ice_request_stack_reinit - Request that iflib re-initialize
3100  * @sc: the device private softc
3101  *
3102  * Request that the device be brought down and up, to re-initialize. For
3103  * example, this may be called when a device reset occurs, or when Tx and Rx
3104  * queues need to be re-initialized.
3105  *
3106  * This is required because the iflib state is outside the driver, and must be
3107  * re-initialized if we need to resart Tx and Rx queues.
3108  */
3109 void
3110 ice_request_stack_reinit(struct ice_softc *sc)
3111 {
3112 	if (CTX_ACTIVE(sc->ctx)) {
3113 		iflib_request_reset(sc->ctx);
3114 		iflib_admin_intr_deferred(sc->ctx);
3115 	}
3116 }
3117 
3118 /**
3119  * ice_driver_is_detaching - Check if the driver is detaching/unloading
3120  * @sc: device private softc
3121  *
3122  * Returns true if the driver is detaching, false otherwise.
3123  *
3124  * @remark on newer kernels, take advantage of iflib_in_detach in order to
3125  * report detachment correctly as early as possible.
3126  *
3127  * @remark this function is used by various code paths that want to avoid
3128  * running if the driver is about to be removed. This includes sysctls and
3129  * other driver access points. Note that it does not fully resolve
3130  * detach-based race conditions as it is possible for a thread to race with
3131  * iflib_in_detach.
3132  */
3133 bool
3134 ice_driver_is_detaching(struct ice_softc *sc)
3135 {
3136 	return (ice_test_state(&sc->state, ICE_STATE_DETACHING) ||
3137 		iflib_in_detach(sc->ctx));
3138 }
3139 
3140 /**
3141  * ice_if_priv_ioctl - Device private ioctl handler
3142  * @ctx: iflib context pointer
3143  * @command: The ioctl command issued
3144  * @data: ioctl specific data
3145  *
3146  * iflib callback for handling custom driver specific ioctls.
3147  *
3148  * @pre Assumes that the iflib context lock is held.
3149  */
3150 static int
3151 ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
3152 {
3153 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3154 	struct ifdrv *ifd;
3155 	device_t dev = sc->dev;
3156 
3157 	if (data == NULL)
3158 		return (EINVAL);
3159 
3160 	ASSERT_CTX_LOCKED(sc);
3161 
3162 	/* Make sure the command type is valid */
3163 	switch (command) {
3164 	case SIOCSDRVSPEC:
3165 	case SIOCGDRVSPEC:
3166 		/* Accepted commands */
3167 		break;
3168 	case SIOCGPRIVATE_0:
3169 		/*
3170 		 * Although we do not support this ioctl command, it's
3171 		 * expected that iflib will forward it to the IFDI_PRIV_IOCTL
3172 		 * handler. Do not print a message in this case
3173 		 */
3174 		return (ENOTSUP);
3175 	default:
3176 		/*
3177 		 * If we get a different command for this function, it's
3178 		 * definitely unexpected, so log a message indicating what
3179 		 * command we got for debugging purposes.
3180 		 */
3181 		device_printf(dev, "%s: unexpected ioctl command %08lx\n",
3182 			      __func__, command);
3183 		return (EINVAL);
3184 	}
3185 
3186 	ifd = (struct ifdrv *)data;
3187 
3188 	switch (ifd->ifd_cmd) {
3189 	case ICE_NVM_ACCESS:
3190 		return ice_handle_nvm_access_ioctl(sc, ifd);
3191 	case ICE_DEBUG_DUMP:
3192 		return ice_handle_debug_dump_ioctl(sc, ifd);
3193 	default:
3194 		return EINVAL;
3195 	}
3196 }
3197 
3198 /**
3199  * ice_if_i2c_req - I2C request handler for iflib
3200  * @ctx: iflib context pointer
3201  * @req: The I2C parameters to use
3202  *
3203  * Read from the port's I2C eeprom using the parameters from the ioctl.
3204  *
3205  * @remark The iflib-only part is pretty simple.
3206  */
3207 static int
3208 ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req)
3209 {
3210 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3211 
3212 	return ice_handle_i2c_req(sc, req);
3213 }
3214 
3215 /**
3216  * ice_if_suspend - PCI device suspend handler for iflib
3217  * @ctx: iflib context pointer
3218  *
3219  * Deinitializes the driver and clears HW resources in preparation for
3220  * suspend or an FLR.
3221  *
3222  * @returns 0; this return value is ignored
3223  */
3224 static int
3225 ice_if_suspend(if_ctx_t ctx)
3226 {
3227 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3228 
3229 	/* At least a PFR is always going to happen after this;
3230 	 * either via FLR or during the D3->D0 transition.
3231 	 */
3232 	ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
3233 
3234 	ice_prepare_for_reset(sc);
3235 
3236 	return (0);
3237 }
3238 
3239 /**
3240  * ice_if_resume - PCI device resume handler for iflib
3241  * @ctx: iflib context pointer
3242  *
3243  * Reinitializes the driver and the HW after PCI resume or after
3244  * an FLR. An init is performed by iflib after this function is finished.
3245  *
3246  * @returns 0; this return value is ignored
3247  */
3248 static int
3249 ice_if_resume(if_ctx_t ctx)
3250 {
3251 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3252 
3253 	ice_rebuild(sc);
3254 
3255 	return (0);
3256 }
3257 
3258 /**
3259  * ice_if_needs_restart - Tell iflib when the driver needs to be reinitialized
3260  * @ctx: iflib context pointer
3261  * @event: event code to check
3262  *
3263  * Defaults to returning true for unknown events.
3264  *
3265  * @returns true if iflib needs to reinit the interface
3266  */
3267 static bool
3268 ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event)
3269 {
3270 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3271 
3272 	switch (event) {
3273 	case IFLIB_RESTART_VLAN_CONFIG:
3274 		if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
3275 			 !(if_getflags(sc->ifp) & IFF_UP))
3276 			return false;
3277 	default:
3278 		return true;
3279 	}
3280 }
3281 
3282 extern struct if_txrx ice_subif_txrx;
3283 
3284 /**
3285  * @var ice_subif_methods
3286  * @brief ice driver method entry points
3287  */
3288 static device_method_t ice_subif_methods[] = {
3289 	/* Device interface */
3290 	DEVMETHOD(device_register, ice_subif_register),
3291 	DEVMETHOD_END
3292 };
3293 
3294 /**
3295  * @var ice_subif_driver
3296  * @brief driver structure for the device API
3297  */
3298 static driver_t ice_subif_driver = {
3299 	.name = "ice_subif",
3300 	.methods = ice_subif_methods,
3301 	.size = sizeof(struct ice_mirr_if),
3302 };
3303 
3304 static device_method_t ice_iflib_subif_methods[] = {
3305 	DEVMETHOD(ifdi_attach_pre, ice_subif_if_attach_pre),
3306 	DEVMETHOD(ifdi_attach_post, ice_subif_if_attach_post),
3307 	DEVMETHOD(ifdi_tx_queues_alloc, ice_subif_if_tx_queues_alloc),
3308 	DEVMETHOD(ifdi_rx_queues_alloc, ice_subif_if_rx_queues_alloc),
3309 	DEVMETHOD(ifdi_msix_intr_assign, ice_subif_if_msix_intr_assign),
3310 	DEVMETHOD(ifdi_intr_enable, ice_subif_if_intr_enable),
3311 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_subif_if_rx_queue_intr_enable),
3312 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_subif_if_tx_queue_intr_enable),
3313 	DEVMETHOD(ifdi_init, ice_subif_if_init),
3314 	DEVMETHOD(ifdi_stop, ice_subif_if_stop),
3315 	DEVMETHOD(ifdi_queues_free, ice_subif_if_queues_free),
3316 	DEVMETHOD(ifdi_media_status, ice_subif_if_media_status),
3317 	DEVMETHOD(ifdi_promisc_set, ice_subif_if_promisc_set),
3318 };
3319 
3320 /**
3321  * @var ice_iflib_subif_driver
3322  * @brief driver structure for the iflib stack
3323  *
3324  * driver_t definition used to setup the iflib device methods.
3325  */
3326 static driver_t ice_iflib_subif_driver = {
3327 	.name = "ice_subif",
3328 	.methods = ice_iflib_subif_methods,
3329 	.size = sizeof(struct ice_mirr_if),
3330 };
3331 
3332 /**
3333  * @var ice_subif_sctx
3334  * @brief ice driver shared context
3335  *
3336  * Similar to the existing ice_sctx, this structure has these differences:
3337  * - isc_admin_intrcnt is set to 0
3338  * - Uses subif iflib driver methods
3339  * - Flagged as a VF for iflib
3340  */
3341 static struct if_shared_ctx ice_subif_sctx = {
3342 	.isc_magic = IFLIB_MAGIC,
3343 	.isc_q_align = PAGE_SIZE,
3344 
3345 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
3346 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
3347 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
3348 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
3349 
3350 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
3351 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
3352 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
3353 
3354 	.isc_nfl = 1,
3355 	.isc_ntxqs = 1,
3356 	.isc_nrxqs = 1,
3357 
3358 	.isc_admin_intrcnt = 0,
3359 	.isc_vendor_info = ice_vendor_info_array,
3360 	.isc_driver_version = __DECONST(char *, ice_driver_version),
3361 	.isc_driver = &ice_iflib_subif_driver,
3362 
3363 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
3364 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX |
3365 		IFLIB_IS_VF,
3366 
3367 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
3368 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
3369 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
3370 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
3371 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
3372 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
3373 };
3374 
3375 static void *
3376 ice_subif_register(device_t dev __unused)
3377 {
3378 	return (&ice_subif_sctx);
3379 }
3380 
3381 static void
3382 ice_subif_setup_scctx(struct ice_mirr_if *mif)
3383 {
3384 	if_softc_ctx_t scctx = mif->subscctx;
3385 
3386 	scctx->isc_txrx = &ice_subif_txrx;
3387 
3388 	scctx->isc_capenable = ICE_FULL_CAPS;
3389 	scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
3390 
3391 	scctx->isc_ntxqsets = 4;
3392 	scctx->isc_nrxqsets = 4;
3393 	scctx->isc_vectors = scctx->isc_nrxqsets;
3394 
3395 	scctx->isc_ntxqsets_max = 256;
3396 	scctx->isc_nrxqsets_max = 256;
3397 
3398 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
3399 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
3400 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
3401 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
3402 
3403 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
3404 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
3405 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
3406 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
3407 }
3408 
3409 static int
3410 ice_subif_if_attach_pre(if_ctx_t ctx)
3411 {
3412 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3413 	device_t dev = iflib_get_dev(ctx);
3414 
3415 	mif->subctx = ctx;
3416 	mif->subdev = dev;
3417 	mif->subscctx = iflib_get_softc_ctx(ctx);
3418 
3419 	/* Setup the iflib softc context structure */
3420 	ice_subif_setup_scctx(mif);
3421 
3422 	return (0);
3423 }
3424 
3425 static int
3426 ice_subif_if_attach_post(if_ctx_t ctx __unused)
3427 {
3428 	return (0);
3429 }
3430 
3431 /**
3432  * ice_destroy_mirror_interface - destroy mirror interface
3433  * @sc: driver private data
3434  *
3435  * Destroys all resources associated with the mirroring interface.
3436  * Will not exit early on failure.
3437  *
3438  * @pre: Mirror interface already exists and is initialized.
3439  */
3440 void
3441 ice_destroy_mirror_interface(struct ice_softc *sc)
3442 {
3443 	struct ice_mirr_if *mif = sc->mirr_if;
3444 	struct ice_vsi *vsi = mif->vsi;
3445 	bool is_locked = false;
3446 	int ret;
3447 
3448 	is_locked = sx_xlocked(sc->iflib_ctx_lock);
3449 	if (is_locked)
3450 		IFLIB_CTX_UNLOCK(sc);
3451 
3452 	if (mif->ifp) {
3453 		ret = iflib_device_deregister(mif->subctx);
3454 		if (ret) {
3455 			device_printf(sc->dev,
3456 			    "iflib_device_deregister for mirror interface failed: %d\n",
3457 			    ret);
3458 		}
3459 	}
3460 
3461 	bus_topo_lock();
3462 	ret = device_delete_child(sc->dev, mif->subdev);
3463 	bus_topo_unlock();
3464 	if (ret) {
3465 		device_printf(sc->dev,
3466 		    "device_delete_child for mirror interface failed: %d\n",
3467 		    ret);
3468 	}
3469 
3470 	if (is_locked)
3471 		IFLIB_CTX_LOCK(sc);
3472 
3473 	if (mif->if_imap) {
3474 		free(mif->if_imap, M_ICE);
3475 		mif->if_imap = NULL;
3476 	}
3477 	if (mif->os_imap) {
3478 		free(mif->os_imap, M_ICE);
3479 		mif->os_imap = NULL;
3480 	}
3481 
3482 	/* These are freed via ice_subif_queues_free_subif
3483 	 * vsi:
3484 	 * - rx_irqvs
3485 	 * - tx_queues
3486 	 * - rx_queues
3487 	 */
3488 	ice_release_vsi(vsi);
3489 
3490 	free(mif, M_ICE);
3491 	sc->mirr_if = NULL;
3492 
3493 }
3494 
3495 /**
3496  * ice_setup_mirror_vsi - Initialize mirror VSI
3497  * @mif: driver private data for mirror interface
3498  *
3499  * Allocates a VSI for a mirror interface, and sets that VSI up for use as a
3500  * mirror for the main PF VSI.
3501  *
3502  * Returns 0 on success, or a standard error code on failure.
3503  */
3504 static int
3505 ice_setup_mirror_vsi(struct ice_mirr_if *mif)
3506 {
3507 	struct ice_softc *sc = mif->back;
3508 	device_t dev = sc->dev;
3509 	struct ice_vsi *vsi;
3510 	int ret = 0;
3511 
3512 	/* vsi is for the new mirror vsi, not the PF's main VSI */
3513 	vsi = ice_alloc_vsi(sc, ICE_VSI_VMDQ2);
3514 	if (!vsi) {
3515 		/* Already prints an error message */
3516 		return (ENOMEM);
3517 	}
3518 	mif->vsi = vsi;
3519 
3520 	/* Reserve VSI queue allocation from PF queues */
3521 	ret = ice_alloc_vsi_qmap(vsi, ICE_DEFAULT_VF_QUEUES, ICE_DEFAULT_VF_QUEUES);
3522 	if (ret) {
3523 		device_printf(dev, "%s: Unable to allocate mirror VSI queue maps (%d queues): %s\n",
3524 		    __func__, ICE_DEFAULT_VF_QUEUES, ice_err_str(ret));
3525 		goto release_vsi;
3526 	}
3527 	vsi->num_tx_queues = vsi->num_rx_queues = ICE_DEFAULT_VF_QUEUES;
3528 
3529 	/* Assign Tx queues from PF space */
3530 	ret = ice_resmgr_assign_scattered(&sc->tx_qmgr, vsi->tx_qmap,
3531 	    vsi->num_tx_queues);
3532 	if (ret) {
3533 		device_printf(dev, "Unable to assign mirror VSI Tx queues: %s\n",
3534 		    ice_err_str(ret));
3535 		goto release_vsi;
3536 	}
3537 	/* Assign Rx queues from PF space */
3538 	ret = ice_resmgr_assign_scattered(&sc->rx_qmgr, vsi->rx_qmap,
3539 	    vsi->num_rx_queues);
3540 	if (ret) {
3541 		device_printf(dev, "Unable to assign mirror VSI Rx queues: %s\n",
3542 		    ice_err_str(ret));
3543 		goto release_vsi;
3544 	}
3545 	vsi->qmap_type = ICE_RESMGR_ALLOC_SCATTERED;
3546 	vsi->max_frame_size = ICE_MAX_FRAME_SIZE;
3547 
3548 	ret = ice_initialize_vsi(vsi);
3549 	if (ret) {
3550 		device_printf(dev, "%s: Error in ice_initialize_vsi for mirror VSI: %s\n",
3551 		    __func__, ice_err_str(ret));
3552 		goto release_vsi;
3553 	}
3554 
3555 	/* Setup this VSI for receiving traffic */
3556 	ret = ice_config_rss(vsi);
3557 	if (ret) {
3558 		device_printf(dev,
3559 		    "Unable to configure RSS for mirror VSI: %s\n",
3560 		    ice_err_str(ret));
3561 		goto release_vsi;
3562 	}
3563 
3564 	/* Set HW rules for mirroring traffic */
3565 	vsi->mirror_src_vsi = sc->pf_vsi.idx;
3566 
3567 	ice_debug(&sc->hw, ICE_DBG_INIT,
3568 	    "Configuring mirroring from VSI %d to %d\n",
3569 	    vsi->mirror_src_vsi, vsi->idx);
3570 	ice_debug(&sc->hw, ICE_DBG_INIT, "(HW num: VSI %d to %d)\n",
3571 	    ice_get_hw_vsi_num(&sc->hw, vsi->mirror_src_vsi),
3572 	    ice_get_hw_vsi_num(&sc->hw, vsi->idx));
3573 
3574 	ret = ice_setup_vsi_mirroring(vsi);
3575 	if (ret) {
3576 		device_printf(dev,
3577 		    "Unable to configure mirroring for VSI: %s\n",
3578 		    ice_err_str(ret));
3579 		goto release_vsi;
3580 	}
3581 
3582 	return (0);
3583 
3584 release_vsi:
3585 	ice_release_vsi(vsi);
3586 	mif->vsi = NULL;
3587 	return (ret);
3588 }
3589 
3590 /**
3591  * ice_create_mirror_interface - Initialize mirror interface
3592  * @sc: driver private data
3593  *
3594  * Creates and sets up a mirror interface that will mirror traffic from
3595  * the main PF interface. Includes a call to iflib_device_register() in order
3596  * to setup necessary iflib structures for this new interface as well.
3597  *
3598  * If it returns successfully, a new interface will be created and will show
3599  * up in the ifconfig interface list.
3600  *
3601  * Returns 0 on success, or a standard error code on failure.
3602  */
3603 int
3604 ice_create_mirror_interface(struct ice_softc *sc)
3605 {
3606 	device_t dev = sc->dev;
3607 	struct ice_mirr_if *mif;
3608 	struct ifmedia *media;
3609 	struct sbuf *sb;
3610 	int ret = 0;
3611 
3612 	mif = (struct ice_mirr_if *)malloc(sizeof(*mif), M_ICE, M_ZERO | M_NOWAIT);
3613 	if (!mif) {
3614 		device_printf(dev, "malloc() error allocating mirror interface\n");
3615 		return (ENOMEM);
3616 	}
3617 
3618 	/* Set pointers */
3619 	sc->mirr_if = mif;
3620 	mif->back = sc;
3621 
3622 	/* Do early setup because these will be called during iflib_device_register():
3623 	 * - ice_subif_if_tx_queues_alloc
3624 	 * - ice_subif_if_rx_queues_alloc
3625 	 */
3626 	ret = ice_setup_mirror_vsi(mif);
3627 	if (ret)
3628 		goto out;
3629 
3630 	/* Determine name for new interface:
3631 	 * (base interface name)(modifier name)(modifier unit number)
3632 	 * e.g. for ice0 with a new mirror interface (modifier m)
3633 	 * of index 0, this equals "ice0m0"
3634 	 */
3635 	sb = sbuf_new_auto();
3636 	MPASS(sb != NULL);
3637 	sbuf_printf(sb, "%sm", device_get_nameunit(dev));
3638 	sbuf_finish(sb);
3639 
3640 	bus_topo_lock();
3641 	mif->subdev = device_add_child(dev, sbuf_data(sb), 0);
3642 	bus_topo_unlock();
3643 
3644 	if (!mif->subdev) {
3645 		device_printf(dev, "device_add_child failed for %s0\n", sbuf_data(sb));
3646 		sbuf_delete(sb);
3647 		free(mif, M_ICE);
3648 		sc->mirr_if = NULL;
3649 		return (ENOMEM);
3650 	}
3651 	sbuf_delete(sb);
3652 
3653 	device_set_driver(mif->subdev, &ice_subif_driver);
3654 
3655 	/* Use iflib_device_register() directly because the driver already
3656 	 * has an initialized softc to pass to iflib
3657 	 */
3658 	ret = iflib_device_register(mif->subdev, mif, &ice_subif_sctx, &mif->subctx);
3659 	if (ret)
3660 		goto out;
3661 
3662 	/* Indicate that created interface will be just for monitoring */
3663 	mif->ifp = iflib_get_ifp(mif->subctx);
3664 	if_setflagbits(mif->ifp, IFF_MONITOR, 0);
3665 
3666 	/* Use autoselect media by default */
3667 	media = iflib_get_media(mif->subctx);
3668 	ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
3669 	ifmedia_set(media, IFM_ETHER | IFM_AUTO);
3670 
3671 	device_printf(dev, "Created dev %s and ifnet %s for mirroring\n",
3672 	    device_get_nameunit(mif->subdev), if_name(mif->ifp));
3673 
3674 	ice_add_vsi_sysctls(mif->vsi);
3675 
3676 	ret = ice_wire_mirror_intrs(mif);
3677 	if (ret)
3678 		goto out;
3679 
3680 	mif->if_attached = true;
3681 	return (0);
3682 
3683 out:
3684 	ice_destroy_mirror_interface(sc);
3685 	return (ret);
3686 }
3687 
3688 /**
3689  * ice_wire_mirror_intrs
3690  * @mif: driver private subinterface structure
3691  *
3692  * Helper function that sets up driver interrupt data and calls
3693  * into iflib in order to setup interrupts in its data structures as well.
3694  *
3695  * Like ice_if_msix_intr_assign, currently requires that we get at least the same
3696  * number of vectors as we have queues, and that we always have the same number
3697  * of Tx and Rx queues. Unlike that function, this calls a special
3698  * iflib_irq_alloc_generic_subif() function for RX interrupts because the
3699  * driver needs to get MSI-X resources from the parent device.
3700  *
3701  * Tx queues use a softirq instead of using their own hardware interrupt so that
3702  * remains unchanged.
3703  *
3704  * Returns 0 on success or an error code from iflib_irq_alloc_generic_subctx()
3705  * on failure.
3706  */
3707 static int
3708 ice_wire_mirror_intrs(struct ice_mirr_if *mif)
3709 {
3710 	struct ice_softc *sc = mif->back;
3711 	struct ice_hw *hw = &sc->hw;
3712 	struct ice_vsi *vsi = mif->vsi;
3713 	device_t dev = mif->subdev;
3714 	int err, i, rid;
3715 
3716 	if_ctx_t ctx = mif->subctx;
3717 
3718 	ice_debug(hw, ICE_DBG_INIT, "%s: Last rid: %d\n", __func__, sc->last_rid);
3719 
3720 	rid = sc->last_rid + 1;
3721 	for (i = 0; i < vsi->num_rx_queues; i++, rid++) {
3722 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
3723 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
3724 		char irq_name[16];
3725 
3726 		// TODO: Change to use dynamic interface number
3727 		snprintf(irq_name, sizeof(irq_name), "m0rxq%d", i);
3728 		/* First arg is parent device (physical port's) iflib ctx */
3729 		err = iflib_irq_alloc_generic_subctx(sc->ctx, ctx,
3730 		    &mif->rx_irqvs[i].irq, rid, IFLIB_INTR_RXTX, ice_msix_que,
3731 		    rxq, rxq->me, irq_name);
3732 		if (err) {
3733 			device_printf(dev,
3734 			    "Failed to allocate q int %d err: %s\n",
3735 			    i, ice_err_str(err));
3736 			i--;
3737 			goto fail;
3738 		}
3739 		MPASS(rid - 1 > 0);
3740 		/* Set vector number used in interrupt enable/disable functions */
3741 		mif->rx_irqvs[i].me = rid - 1;
3742 		rxq->irqv = &mif->rx_irqvs[i];
3743 
3744 		bzero(irq_name, sizeof(irq_name));
3745 		snprintf(irq_name, sizeof(irq_name), "m0txq%d", i);
3746 		iflib_softirq_alloc_generic(ctx, &mif->rx_irqvs[i].irq,
3747 		    IFLIB_INTR_TX, txq, txq->me, irq_name);
3748 		txq->irqv = &mif->rx_irqvs[i];
3749 	}
3750 
3751 	sc->last_rid = rid - 1;
3752 
3753 	ice_debug(hw, ICE_DBG_INIT, "%s: New last rid: %d\n", __func__,
3754 	    sc->last_rid);
3755 
3756 	return (0);
3757 
3758 fail:
3759 	for (; i >= 0; i--)
3760 		iflib_irq_free(ctx, &mif->rx_irqvs[i].irq);
3761 	return (err);
3762 }
3763 
3764 /**
3765  * ice_subif_rebuild - Rebuild subinterface post reset
3766  * @sc: The device private softc
3767  *
3768  * Restore subinterface state after a reset occurred.
3769  * Restart the VSI and enable the mirroring.
3770  */
3771 static int
3772 ice_subif_rebuild(struct ice_softc *sc)
3773 {
3774 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(sc->ctx);
3775 	struct ice_vsi *vsi = sc->mirr_if->vsi;
3776 	int err;
3777 
3778 	err = ice_subif_rebuild_vsi_qmap(sc);
3779 	if (err) {
3780 		device_printf(sc->dev, "Unable to re-assign mirror VSI queues, err %s\n",
3781 		      ice_err_str(err));
3782 		return (err);
3783 	}
3784 
3785 	err = ice_initialize_vsi(vsi);
3786 	if (err) {
3787 		device_printf(sc->dev, "Unable to re-initialize mirror VSI, err %s\n",
3788 		      ice_err_str(err));
3789 		goto err_release_queue_allocations_subif;
3790 	}
3791 
3792 	err = ice_config_rss(vsi);
3793 	if (err) {
3794 		device_printf(sc->dev,
3795 		      "Unable to reconfigure RSS for the mirror VSI, err %s\n",
3796 		      ice_err_str(err));
3797 		goto err_deinit_subif_vsi;
3798 	}
3799 
3800 	vsi->mirror_src_vsi = sc->pf_vsi.idx;
3801 
3802 	err = ice_setup_vsi_mirroring(vsi);
3803 	if (err) {
3804 		device_printf(sc->dev,
3805 		      "Unable to configure mirroring for VSI: %s\n",
3806 		      ice_err_str(err));
3807 		goto err_deinit_subif_vsi;
3808 	}
3809 
3810 	ice_set_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT);
3811 
3812 	return (0);
3813 
3814 err_deinit_subif_vsi:
3815 	ice_deinit_vsi(vsi);
3816 err_release_queue_allocations_subif:
3817 	ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap,
3818 	    sc->mirr_if->num_irq_vectors);
3819 	ice_resmgr_release_map(&sc->rx_qmgr, vsi->rx_qmap,
3820 	    sc->mirr_if->num_irq_vectors);
3821 
3822 	return (err);
3823 }
3824 
3825 /**
3826  * ice_subif_rebuild_vsi_qmap - Rebuild the mirror VSI queue mapping
3827  * @sc: the device softc pointer
3828  *
3829  * Loops over the Tx and Rx queues for the mirror VSI and reassigns the queue
3830  * mapping after a reset occurred.
3831  */
3832 static int
3833 ice_subif_rebuild_vsi_qmap(struct ice_softc *sc)
3834 {
3835 	struct ice_vsi *vsi = sc->mirr_if->vsi;
3836 	struct ice_tx_queue *txq;
3837 	struct ice_rx_queue *rxq;
3838 	int err, i;
3839 
3840 	err = ice_resmgr_assign_scattered(&sc->tx_qmgr, vsi->tx_qmap, sc->mirr_if->num_irq_vectors);
3841 	if (err) {
3842 		device_printf(sc->dev, "Unable to assign mirror VSI Tx queues: %s\n",
3843 		      ice_err_str(err));
3844 		return (err);
3845 	}
3846 
3847 	err = ice_resmgr_assign_scattered(&sc->rx_qmgr, vsi->rx_qmap, sc->mirr_if->num_irq_vectors);
3848 	if (err) {
3849 		device_printf(sc->dev, "Unable to assign mirror VSI Rx queues: %s\n",
3850 		      ice_err_str(err));
3851 		goto err_release_tx_queues;
3852 	}
3853 
3854 	vsi->qmap_type = ICE_RESMGR_ALLOC_SCATTERED;
3855 
3856 	/* Re-assign Tx queue tail pointers */
3857 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
3858 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
3859 
3860 	/* Re-assign Rx queue tail pointers */
3861 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
3862 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
3863 
3864 	return (0);
3865 
3866 err_release_tx_queues:
3867 	ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap, vsi->num_tx_queues);
3868 
3869 	return (err);
3870 }
3871 
3872 /**
3873  * ice_subif_if_tx_queues_alloc - Allocate Tx queue memory for subinterfaces
3874  * @ctx: iflib context structure
3875  * @vaddrs: virtual addresses for the queue memory
3876  * @paddrs: physical addresses for the queue memory
3877  * @ntxqs: the number of Tx queues per set (should always be 1)
3878  * @ntxqsets: the number of Tx queue sets to allocate
3879  *
3880  * See ice_if_tx_queues_alloc() description. Similar to that function, but
3881  * for subinterfaces instead.
3882  */
3883 static int
3884 ice_subif_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
3885 			     int __invariant_only ntxqs, int ntxqsets)
3886 {
3887 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3888 	struct ice_tx_queue *txq;
3889 	device_t dev = mif->subdev;
3890 	struct ice_vsi *vsi;
3891 	int err, i, j;
3892 
3893 	MPASS(mif != NULL);
3894 	MPASS(ntxqs == 1);
3895 	MPASS(mif->subscctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
3896 
3897 	vsi = mif->vsi;
3898 
3899 	MPASS(vsi->num_tx_queues == ntxqsets);
3900 
3901 	/* Allocate queue structure memory */
3902 	if (!(vsi->tx_queues =
3903 	      (struct ice_tx_queue *)malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
3904 		device_printf(dev, "%s: Unable to allocate Tx queue memory for subfunction\n",
3905 		    __func__);
3906 		return (ENOMEM);
3907 	}
3908 
3909 	/* Allocate report status arrays */
3910 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
3911 		if (!(txq->tx_rsq =
3912 		      (uint16_t *)malloc(sizeof(uint16_t) * mif->subscctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
3913 			device_printf(dev,
3914 			    "%s: Unable to allocate tx_rsq memory for subfunction\n", __func__);
3915 			err = ENOMEM;
3916 			goto free_tx_queues;
3917 		}
3918 		/* Initialize report status array */
3919 		for (j = 0; j < mif->subscctx->isc_ntxd[0]; j++)
3920 			txq->tx_rsq[j] = QIDX_INVALID;
3921 	}
3922 
3923 	/* Add Tx queue sysctls context */
3924 	ice_vsi_add_txqs_ctx(vsi);
3925 
3926 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
3927 		/* q_handle == me when only one TC */
3928 		txq->me = txq->q_handle = i;
3929 		txq->vsi = vsi;
3930 
3931 		/* store the queue size for easier access */
3932 		txq->desc_count = mif->subscctx->isc_ntxd[0];
3933 
3934 		/* get the virtual and physical address of the hardware queues */
3935 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
3936 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
3937 		txq->tx_paddr = paddrs[i];
3938 
3939 		ice_add_txq_sysctls(txq);
3940 	}
3941 
3942 	return (0);
3943 
3944 free_tx_queues:
3945 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
3946 		if (txq->tx_rsq != NULL) {
3947 			free(txq->tx_rsq, M_ICE);
3948 			txq->tx_rsq = NULL;
3949 		}
3950 	}
3951 	free(vsi->tx_queues, M_ICE);
3952 	vsi->tx_queues = NULL;
3953 	return (err);
3954 }
3955 
3956 /**
3957  * ice_subif_if_rx_queues_alloc - Allocate Rx queue memory for subinterfaces
3958  * @ctx: iflib context structure
3959  * @vaddrs: virtual addresses for the queue memory
3960  * @paddrs: physical addresses for the queue memory
3961  * @nrxqs: number of Rx queues per set (should always be 1)
3962  * @nrxqsets: number of Rx queue sets to allocate
3963  *
3964  * See ice_if_rx_queues_alloc() for general summary; this is similar to that
3965  * but implemented for subinterfaces.
3966  */
3967 static int
3968 ice_subif_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
3969     int __invariant_only nrxqs, int nrxqsets)
3970 {
3971 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3972 	struct ice_rx_queue *rxq;
3973 	device_t dev = mif->subdev;
3974 	struct ice_vsi *vsi;
3975 	int i;
3976 
3977 	MPASS(mif != NULL);
3978 	MPASS(nrxqs == 1);
3979 	MPASS(mif->subscctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
3980 
3981 	vsi = mif->vsi;
3982 
3983 	MPASS(vsi->num_rx_queues == nrxqsets);
3984 
3985 	/* Allocate queue structure memory */
3986 	if (!(vsi->rx_queues =
3987 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
3988 		device_printf(dev, "%s: Unable to allocate Rx queue memory for subfunction\n",
3989 		    __func__);
3990 		return (ENOMEM);
3991 	}
3992 
3993 	/* Add Rx queue sysctls context */
3994 	ice_vsi_add_rxqs_ctx(vsi);
3995 
3996 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
3997 		rxq->me = i;
3998 		rxq->vsi = vsi;
3999 
4000 		/* store the queue size for easier access */
4001 		rxq->desc_count = mif->subscctx->isc_nrxd[0];
4002 
4003 		/* get the virtual and physical address of the hardware queues */
4004 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
4005 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
4006 		rxq->rx_paddr = paddrs[i];
4007 
4008 		ice_add_rxq_sysctls(rxq);
4009 	}
4010 
4011 	return (0);
4012 }
4013 
4014 /**
4015  * ice_subif_if_msix_intr_assign - Assign MSI-X interrupts to new sub interface
4016  * @ctx: the iflib context structure
4017  * @msix: the number of vectors we were assigned
4018  *
4019  * Allocates and assigns driver private resources for MSI-X interrupt tracking.
4020  *
4021  * @pre OS MSI-X resources have been pre-allocated by parent interface.
4022  */
4023 static int
4024 ice_subif_if_msix_intr_assign(if_ctx_t ctx, int msix)
4025 {
4026 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4027 	struct ice_softc *sc = mif->back;
4028 	struct ice_vsi *vsi = mif->vsi;
4029 
4030 	device_t dev = mif->subdev;
4031 	int ret;
4032 
4033 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
4034 		device_printf(dev,
4035 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
4036 			      vsi->num_tx_queues, vsi->num_rx_queues);
4037 		return (EOPNOTSUPP);
4038 	}
4039 
4040 	if (msix > sc->extra_vectors) {
4041 		device_printf(dev,
4042 		     "%s: Not enough spare (%d) msix vectors for new sub-interface requested (%d)\n",
4043 		     __func__, sc->extra_vectors, msix);
4044 		return (ENOSPC);
4045 	}
4046 	device_printf(dev, "%s: Using %d vectors for sub-interface\n", __func__,
4047 	    msix);
4048 
4049 	/* Allocate space to store the IRQ vector data */
4050 	mif->num_irq_vectors = vsi->num_rx_queues;
4051 	mif->rx_irqvs = (struct ice_irq_vector *)
4052 	    malloc(sizeof(struct ice_irq_vector) * (mif->num_irq_vectors),
4053 		   M_ICE, M_NOWAIT);
4054 	if (!mif->rx_irqvs) {
4055 		device_printf(dev,
4056 			      "Unable to allocate RX irqv memory for mirror's %d vectors\n",
4057 			      mif->num_irq_vectors);
4058 		return (ENOMEM);
4059 	}
4060 
4061 	/* Assign mirror interface interrupts from PF device space */
4062 	if (!(mif->if_imap =
4063 	      (u16 *)malloc(sizeof(u16) * mif->num_irq_vectors,
4064 	      M_ICE, M_NOWAIT))) {
4065 		device_printf(dev, "Unable to allocate mirror intfc if_imap memory\n");
4066 		ret = ENOMEM;
4067 		goto free_irqvs;
4068 	}
4069 	ret = ice_resmgr_assign_contiguous(&sc->dev_imgr, mif->if_imap, mif->num_irq_vectors);
4070 	if (ret) {
4071 		device_printf(dev, "Unable to assign mirror intfc PF device interrupt mapping: %s\n",
4072 			      ice_err_str(ret));
4073 		goto free_if_imap;
4074 	}
4075 	/* Assign mirror interface interrupts from OS interrupt allocation space */
4076 	if (!(mif->os_imap =
4077 	      (u16 *)malloc(sizeof(u16) * mif->num_irq_vectors,
4078 	      M_ICE, M_NOWAIT))) {
4079 		device_printf(dev, "Unable to allocate mirror intfc os_imap memory\n");
4080 		ret = ENOMEM;
4081 		goto free_if_imap;
4082 	}
4083 	ret = ice_resmgr_assign_contiguous(&sc->os_imgr, mif->os_imap, mif->num_irq_vectors);
4084 	if (ret) {
4085 		device_printf(dev, "Unable to assign mirror intfc OS interrupt mapping: %s\n",
4086 			      ice_err_str(ret));
4087 		goto free_if_imap;
4088 	}
4089 
4090 	return (0);
4091 
4092 free_if_imap:
4093 	free(mif->if_imap, M_ICE);
4094 	mif->if_imap = NULL;
4095 free_irqvs:
4096 	free(mif->rx_irqvs, M_ICE);
4097 	mif->rx_irqvs = NULL;
4098 	return (ret);
4099 }
4100 
4101 /**
4102  * ice_subif_if_intr_enable - Enable device interrupts for a subinterface
4103  * @ctx: iflib context structure
4104  *
4105  * Called by iflib to request enabling all interrupts that belong to a
4106  * subinterface.
4107  */
4108 static void
4109 ice_subif_if_intr_enable(if_ctx_t ctx)
4110 {
4111 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4112 	struct ice_softc *sc = mif->back;
4113 	struct ice_vsi *vsi = mif->vsi;
4114 	struct ice_hw *hw = &sc->hw;
4115 
4116 	/* Do not enable queue interrupts in recovery mode */
4117 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4118 		return;
4119 
4120 	/* Enable all queue interrupts */
4121 	for (int i = 0; i < vsi->num_rx_queues; i++)
4122 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
4123 }
4124 
4125 /**
4126  * ice_subif_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
4127  * @ctx: iflib context structure
4128  * @rxqid: the Rx queue to enable
4129  *
4130  * Enable a specific Rx queue interrupt.
4131  *
4132  * This function is not protected by the iflib CTX lock.
4133  */
4134 static int
4135 ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
4136 {
4137 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4138 	struct ice_softc *sc = mif->back;
4139 	struct ice_vsi *vsi = mif->vsi;
4140 	struct ice_hw *hw = &sc->hw;
4141 
4142 	/* Do not enable queue interrupts in recovery mode */
4143 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4144 		return (ENOSYS);
4145 
4146 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
4147 	return (0);
4148 }
4149 
4150 /**
4151  * ice_subif_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
4152  * @ctx: iflib context structure
4153  * @txqid: the Tx queue to enable
4154  *
4155  * Enable a specific Tx queue interrupt.
4156  *
4157  * This function is not protected by the iflib CTX lock.
4158  */
4159 static int
4160 ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
4161 {
4162 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4163 	struct ice_softc *sc = mif->back;
4164 	struct ice_vsi *vsi = mif->vsi;
4165 	struct ice_hw *hw = &sc->hw;
4166 
4167 	/* Do not enable queue interrupts in recovery mode */
4168 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4169 		return (ENOSYS);
4170 
4171 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
4172 	return (0);
4173 }
4174 
4175 /**
4176  * ice_subif_if_init - Initialize the subinterface
4177  * @ctx: iflib ctx structure
4178  *
4179  * Called by iflib to bring the device up, i.e. ifconfig ice0m0 up.
4180  * Prepares the Tx and Rx engines and enables interrupts.
4181  *
4182  * @pre assumes the caller holds the iflib CTX lock
4183  */
4184 static void
4185 ice_subif_if_init(if_ctx_t ctx)
4186 {
4187 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4188 	struct ice_softc *sc = mif->back;
4189 	struct ice_vsi *vsi = mif->vsi;
4190 	device_t dev = mif->subdev;
4191 	int err;
4192 
4193 	if (ice_driver_is_detaching(sc))
4194 		return;
4195 
4196 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4197 		return;
4198 
4199 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
4200 		device_printf(dev,
4201 		    "request to start interface cannot be completed as the parent device %s failed to reset\n",
4202 		    device_get_nameunit(sc->dev));
4203 		return;
4204 	}
4205 
4206 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
4207 		device_printf(dev,
4208 		    "request to start interface cannot be completed while parent device %s is prepared for impending reset\n",
4209 		    device_get_nameunit(sc->dev));
4210 		return;
4211 	}
4212 
4213 	/* XXX: Equiv to ice_update_rx_mbuf_sz */
4214 	vsi->mbuf_sz = iflib_get_rx_mbuf_sz(ctx);
4215 
4216 	/* Initialize software Tx tracking values */
4217 	ice_init_tx_tracking(vsi);
4218 
4219 	err = ice_cfg_vsi_for_tx(vsi);
4220 	if (err) {
4221 		device_printf(dev,
4222 			      "Unable to configure subif VSI for Tx: %s\n",
4223 			      ice_err_str(err));
4224 		return;
4225 	}
4226 
4227 	err = ice_cfg_vsi_for_rx(vsi);
4228 	if (err) {
4229 		device_printf(dev,
4230 			      "Unable to configure subif VSI for Rx: %s\n",
4231 			      ice_err_str(err));
4232 		goto err_cleanup_tx;
4233 	}
4234 
4235 	err = ice_control_all_rx_queues(vsi, true);
4236 	if (err) {
4237 		device_printf(dev,
4238 			      "Unable to enable subif Rx rings for receive: %s\n",
4239 			      ice_err_str(err));
4240 		goto err_cleanup_tx;
4241 	}
4242 
4243 	ice_configure_all_rxq_interrupts(vsi);
4244 	ice_configure_rx_itr(vsi);
4245 
4246 	ice_set_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED);
4247 	return;
4248 
4249 err_cleanup_tx:
4250 	ice_vsi_disable_tx(vsi);
4251 }
4252 
4253 /**
4254  * ice_if_stop_subif - Stop the subinterface
4255  * @ctx: iflib context structure
4256  * @ifs: subinterface context structure
4257  *
4258  * Called by iflib to stop the subinterface and bring it down.
4259  * (e.g. ifconfig ice0m0 down)
4260  *
4261  * @pre assumes the caller holds the iflib CTX lock
4262  */
4263 static void
4264 ice_subif_if_stop(if_ctx_t ctx)
4265 {
4266 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4267 	struct ice_softc *sc = mif->back;
4268 	struct ice_vsi *vsi = mif->vsi;
4269 	device_t dev = mif->subdev;
4270 
4271 	if (!ice_testandclear_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED))
4272 		return;
4273 
4274 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
4275 		device_printf(dev,
4276 		    "request to stop interface cannot be completed as the parent device %s failed to reset\n",
4277 		    device_get_nameunit(sc->dev));
4278 		return;
4279 	}
4280 
4281 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
4282 		device_printf(dev,
4283 		    "request to stop interface cannot be completed while parent device %s is prepared for impending reset\n",
4284 		    device_get_nameunit(sc->dev));
4285 		return;
4286 	}
4287 
4288 	/* Dissociate the Tx and Rx queues from the interrupts */
4289 	ice_flush_txq_interrupts(vsi);
4290 	ice_flush_rxq_interrupts(vsi);
4291 
4292 	/* Disable the Tx and Rx queues */
4293 	ice_vsi_disable_tx(vsi);
4294 	ice_control_all_rx_queues(vsi, false);
4295 }
4296 
4297 /**
4298  * ice_free_irqvs_subif - Free IRQ vector memory for subinterfaces
4299  * @mif: Mirror interface private structure
4300  *
4301  * Free IRQ vector memory allocated during ice_subif_if_msix_intr_assign.
4302  */
4303 static void
4304 ice_free_irqvs_subif(struct ice_mirr_if *mif)
4305 {
4306 	struct ice_softc *sc = mif->back;
4307 	struct ice_vsi *vsi = mif->vsi;
4308 	if_ctx_t ctx = sc->ctx;
4309 	int i;
4310 
4311 	/* If the irqvs array is NULL, then there are no vectors to free */
4312 	if (mif->rx_irqvs == NULL)
4313 		return;
4314 
4315 	/* Free the IRQ vectors -- currently subinterfaces have number
4316 	 * of vectors equal to number of RX queues
4317 	 *
4318 	 * XXX: ctx is parent device's ctx, not the subinterface ctx
4319 	 */
4320 	for (i = 0; i < vsi->num_rx_queues; i++)
4321 		iflib_irq_free(ctx, &mif->rx_irqvs[i].irq);
4322 
4323 	ice_resmgr_release_map(&sc->os_imgr, mif->os_imap,
4324 	    mif->num_irq_vectors);
4325 	ice_resmgr_release_map(&sc->dev_imgr, mif->if_imap,
4326 	    mif->num_irq_vectors);
4327 
4328 	sc->last_rid -= vsi->num_rx_queues;
4329 
4330 	/* Clear the irqv pointers */
4331 	for (i = 0; i < vsi->num_rx_queues; i++)
4332 		vsi->rx_queues[i].irqv = NULL;
4333 
4334 	for (i = 0; i < vsi->num_tx_queues; i++)
4335 		vsi->tx_queues[i].irqv = NULL;
4336 
4337 	/* Release the vector array memory */
4338 	free(mif->rx_irqvs, M_ICE);
4339 	mif->rx_irqvs = NULL;
4340 }
4341 
4342 /**
4343  * ice_subif_if_queues_free - Free queue memory for subinterfaces
4344  * @ctx: the iflib context structure
4345  *
4346  * Free queue memory allocated by ice_subif_tx_queues_alloc() and
4347  * ice_subif_if_rx_queues_alloc().
4348  */
4349 static void
4350 ice_subif_if_queues_free(if_ctx_t ctx)
4351 {
4352 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4353 	struct ice_vsi *vsi = mif->vsi;
4354 	struct ice_tx_queue *txq;
4355 	int i;
4356 
4357 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
4358 	 * pointers.
4359 	 */
4360 	ice_vsi_del_txqs_ctx(vsi);
4361 	ice_vsi_del_rxqs_ctx(vsi);
4362 
4363 	/* Release MSI-X IRQ vectors */
4364 	ice_free_irqvs_subif(mif);
4365 
4366 	if (vsi->tx_queues != NULL) {
4367 		/* free the tx_rsq arrays */
4368 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
4369 			if (txq->tx_rsq != NULL) {
4370 				free(txq->tx_rsq, M_ICE);
4371 				txq->tx_rsq = NULL;
4372 			}
4373 		}
4374 		free(vsi->tx_queues, M_ICE);
4375 		vsi->tx_queues = NULL;
4376 	}
4377 	if (vsi->rx_queues != NULL) {
4378 		free(vsi->rx_queues, M_ICE);
4379 		vsi->rx_queues = NULL;
4380 	}
4381 }
4382 
4383 /**
4384  * ice_subif_if_media_status - Report subinterface media
4385  * @ctx: iflib context structure
4386  * @ifmr: ifmedia request structure to update
4387  *
4388  * Updates the provided ifmr with something, in order to prevent a
4389  * "no media types?" message from ifconfig.
4390  *
4391  * Mirror interfaces are always up.
4392  */
4393 static void
4394 ice_subif_if_media_status(if_ctx_t ctx __unused, struct ifmediareq *ifmr)
4395 {
4396 	ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE;
4397 	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
4398 }
4399 
4400 /**
4401  * ice_subif_if_promisc_set - Set subinterface promiscuous mode
4402  * @ctx: iflib context structure
4403  * @flags: promiscuous flags to configure
4404  *
4405  * Called by iflib to configure device promiscuous mode.
4406  *
4407  * @remark This does not need to be implemented for now.
4408  */
4409 static int
4410 ice_subif_if_promisc_set(if_ctx_t ctx __unused, int flags __unused)
4411 {
4412 	return (0);
4413 }
4414 
4415