xref: /freebsd/sys/dev/ice/if_ice_iflib.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2024, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /**
33  * @file if_ice_iflib.c
34  * @brief iflib driver implementation
35  *
36  * Contains the main entry point for the iflib driver implementation. It
37  * implements the various ifdi driver methods, and sets up the module and
38  * driver values to load an iflib driver.
39  */
40 
41 #include "ice_iflib.h"
42 #include "ice_drv_info.h"
43 #include "ice_switch.h"
44 #include "ice_sched.h"
45 
46 #include <sys/module.h>
47 #include <sys/sockio.h>
48 #include <sys/smp.h>
49 #include <dev/pci/pcivar.h>
50 #include <dev/pci/pcireg.h>
51 
52 /*
53  * Device method prototypes
54  */
55 
56 static void *ice_register(device_t);
57 static int  ice_if_attach_pre(if_ctx_t);
58 static int  ice_attach_pre_recovery_mode(struct ice_softc *sc);
59 static int  ice_if_attach_post(if_ctx_t);
60 static void ice_attach_post_recovery_mode(struct ice_softc *sc);
61 static int  ice_if_detach(if_ctx_t);
62 static int  ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets);
63 static int  ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets);
64 static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix);
65 static void ice_if_queues_free(if_ctx_t ctx);
66 static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu);
67 static void ice_if_intr_enable(if_ctx_t ctx);
68 static void ice_if_intr_disable(if_ctx_t ctx);
69 static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
70 static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
71 static int ice_if_promisc_set(if_ctx_t ctx, int flags);
72 static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
73 static int ice_if_media_change(if_ctx_t ctx);
74 static void ice_if_init(if_ctx_t ctx);
75 static void ice_if_timer(if_ctx_t ctx, uint16_t qid);
76 static void ice_if_update_admin_status(if_ctx_t ctx);
77 static void ice_if_multi_set(if_ctx_t ctx);
78 static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag);
79 static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
80 static void ice_if_stop(if_ctx_t ctx);
81 static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter);
82 static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
83 static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
84 static int ice_if_suspend(if_ctx_t ctx);
85 static int ice_if_resume(if_ctx_t ctx);
86 static bool ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event);
87 static void ice_init_link(struct ice_softc *sc);
88 static int ice_setup_mirror_vsi(struct ice_mirr_if *mif);
89 static int ice_wire_mirror_intrs(struct ice_mirr_if *mif);
90 static void ice_free_irqvs_subif(struct ice_mirr_if *mif);
91 static void *ice_subif_register(device_t);
92 static void ice_subif_setup_scctx(struct ice_mirr_if *mif);
93 static int ice_subif_rebuild(struct ice_softc *sc);
94 static int ice_subif_rebuild_vsi_qmap(struct ice_softc *sc);
95 
96 /* Iflib API */
97 static int ice_subif_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
98     uint64_t *paddrs, int ntxqs, int ntxqsets);
99 static int ice_subif_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
100     uint64_t *paddrs, int nrxqs, int nrxqsets);
101 static int ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
102 static int ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
103 static void ice_subif_if_intr_enable(if_ctx_t ctx);
104 static int ice_subif_if_msix_intr_assign(if_ctx_t ctx, int msix);
105 static void ice_subif_if_init(if_ctx_t ctx);
106 static void ice_subif_if_stop(if_ctx_t ctx);
107 static void ice_subif_if_queues_free(if_ctx_t ctx);
108 static int ice_subif_if_attach_pre(if_ctx_t);
109 static int ice_subif_if_attach_post(if_ctx_t);
110 static void ice_subif_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
111 static int ice_subif_if_promisc_set(if_ctx_t ctx, int flags);
112 
113 static int ice_msix_que(void *arg);
114 static int ice_msix_admin(void *arg);
115 
116 /*
117  * Helper function prototypes
118  */
119 static int ice_pci_mapping(struct ice_softc *sc);
120 static void ice_free_pci_mapping(struct ice_softc *sc);
121 static void ice_update_link_status(struct ice_softc *sc, bool update_media);
122 static void ice_init_device_features(struct ice_softc *sc);
123 static void ice_init_tx_tracking(struct ice_vsi *vsi);
124 static void ice_handle_reset_event(struct ice_softc *sc);
125 static void ice_handle_pf_reset_request(struct ice_softc *sc);
126 static void ice_prepare_for_reset(struct ice_softc *sc);
127 static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc);
128 static void ice_rebuild(struct ice_softc *sc);
129 static void ice_rebuild_recovery_mode(struct ice_softc *sc);
130 static void ice_free_irqvs(struct ice_softc *sc);
131 static void ice_update_rx_mbuf_sz(struct ice_softc *sc);
132 static void ice_poll_for_media_avail(struct ice_softc *sc);
133 static void ice_setup_scctx(struct ice_softc *sc);
134 static int ice_allocate_msix(struct ice_softc *sc);
135 static void ice_admin_timer(void *arg);
136 static void ice_transition_recovery_mode(struct ice_softc *sc);
137 static void ice_transition_safe_mode(struct ice_softc *sc);
138 static void ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask);
139 
140 /*
141  * Device Interface Declaration
142  */
143 
144 /**
145  * @var ice_methods
146  * @brief ice driver method entry points
147  *
148  * List of device methods implementing the generic device interface used by
149  * the device stack to interact with the ice driver. Since this is an iflib
150  * driver, most of the methods point to the generic iflib implementation.
151  */
152 static device_method_t ice_methods[] = {
153 	/* Device interface */
154 	DEVMETHOD(device_register, ice_register),
155 	DEVMETHOD(device_probe,    iflib_device_probe_vendor),
156 	DEVMETHOD(device_attach,   iflib_device_attach),
157 	DEVMETHOD(device_detach,   iflib_device_detach),
158 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
159 	DEVMETHOD(device_suspend,  iflib_device_suspend),
160 	DEVMETHOD(device_resume,   iflib_device_resume),
161 	DEVMETHOD_END
162 };
163 
164 /**
165  * @var ice_iflib_methods
166  * @brief iflib method entry points
167  *
168  * List of device methods used by the iflib stack to interact with this
169  * driver. These are the real main entry points used to interact with this
170  * driver.
171  */
172 static device_method_t ice_iflib_methods[] = {
173 	DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre),
174 	DEVMETHOD(ifdi_attach_post, ice_if_attach_post),
175 	DEVMETHOD(ifdi_detach, ice_if_detach),
176 	DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc),
177 	DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc),
178 	DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign),
179 	DEVMETHOD(ifdi_queues_free, ice_if_queues_free),
180 	DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set),
181 	DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable),
182 	DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable),
183 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable),
184 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable),
185 	DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set),
186 	DEVMETHOD(ifdi_media_status, ice_if_media_status),
187 	DEVMETHOD(ifdi_media_change, ice_if_media_change),
188 	DEVMETHOD(ifdi_init, ice_if_init),
189 	DEVMETHOD(ifdi_stop, ice_if_stop),
190 	DEVMETHOD(ifdi_timer, ice_if_timer),
191 	DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status),
192 	DEVMETHOD(ifdi_multi_set, ice_if_multi_set),
193 	DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register),
194 	DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister),
195 	DEVMETHOD(ifdi_get_counter, ice_if_get_counter),
196 	DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl),
197 	DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req),
198 	DEVMETHOD(ifdi_suspend, ice_if_suspend),
199 	DEVMETHOD(ifdi_resume, ice_if_resume),
200 	DEVMETHOD(ifdi_needs_restart, ice_if_needs_restart),
201 	DEVMETHOD_END
202 };
203 
204 /**
205  * @var ice_driver
206  * @brief driver structure for the generic device stack
207  *
208  * driver_t definition used to setup the generic device methods.
209  */
210 static driver_t ice_driver = {
211 	.name = "ice",
212 	.methods = ice_methods,
213 	.size = sizeof(struct ice_softc),
214 };
215 
216 /**
217  * @var ice_iflib_driver
218  * @brief driver structure for the iflib stack
219  *
220  * driver_t definition used to setup the iflib device methods.
221  */
222 static driver_t ice_iflib_driver = {
223 	.name = "ice",
224 	.methods = ice_iflib_methods,
225 	.size = sizeof(struct ice_softc),
226 };
227 
228 extern struct if_txrx ice_txrx;
229 extern struct if_txrx ice_recovery_txrx;
230 
231 /**
232  * @var ice_sctx
233  * @brief ice driver shared context
234  *
235  * Structure defining shared values (context) that is used by all instances of
236  * the device. Primarily used to setup details about how the iflib stack
237  * should treat this driver. Also defines the default, minimum, and maximum
238  * number of descriptors in each ring.
239  */
240 static struct if_shared_ctx ice_sctx = {
241 	.isc_magic = IFLIB_MAGIC,
242 	.isc_q_align = PAGE_SIZE,
243 
244 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
245 	/* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but
246 	 * that doesn't make sense since that would be larger than the maximum
247 	 * size of a single packet.
248 	 */
249 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
250 
251 	/* XXX: This is only used by iflib to ensure that
252 	 * scctx->isc_tx_tso_size_max + the VLAN header is a valid size.
253 	 */
254 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
255 	/* XXX: This is used by iflib to set the number of segments in the TSO
256 	 * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the
257 	 * related ifnet parameter.
258 	 */
259 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
260 
261 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
262 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
263 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
264 
265 	.isc_nfl = 1,
266 	.isc_ntxqs = 1,
267 	.isc_nrxqs = 1,
268 
269 	.isc_admin_intrcnt = 1,
270 	.isc_vendor_info = ice_vendor_info_array,
271 	.isc_driver_version = __DECONST(char *, ice_driver_version),
272 	.isc_driver = &ice_iflib_driver,
273 
274 	/*
275 	 * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available
276 	 * for hardware checksum offload
277 	 *
278 	 * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the
279 	 * IP sum field, required by our hardware to calculate valid TSO
280 	 * checksums.
281 	 *
282 	 * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs
283 	 * even when the interface is down.
284 	 *
285 	 * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X
286 	 * vectors manually instead of relying on iflib code to do this.
287 	 */
288 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
289 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX,
290 
291 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
292 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
293 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
294 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
295 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
296 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
297 };
298 
299 DRIVER_MODULE(ice, pci, ice_driver, ice_module_event_handler, NULL);
300 
301 MODULE_VERSION(ice, 1);
302 MODULE_DEPEND(ice, pci, 1, 1, 1);
303 MODULE_DEPEND(ice, ether, 1, 1, 1);
304 MODULE_DEPEND(ice, iflib, 1, 1, 1);
305 
306 IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array);
307 
308 /* Static driver-wide sysctls */
309 #include "ice_iflib_sysctls.h"
310 
311 /**
312  * ice_pci_mapping - Map PCI BAR memory
313  * @sc: device private softc
314  *
315  * Map PCI BAR 0 for device operation.
316  */
317 static int
318 ice_pci_mapping(struct ice_softc *sc)
319 {
320 	int rc;
321 
322 	/* Map BAR0 */
323 	rc = ice_map_bar(sc->dev, &sc->bar0, 0);
324 	if (rc)
325 		return rc;
326 
327 	return 0;
328 }
329 
330 /**
331  * ice_free_pci_mapping - Release PCI BAR memory
332  * @sc: device private softc
333  *
334  * Release PCI BARs which were previously mapped by ice_pci_mapping().
335  */
336 static void
337 ice_free_pci_mapping(struct ice_softc *sc)
338 {
339 	/* Free BAR0 */
340 	ice_free_bar(sc->dev, &sc->bar0);
341 }
342 
343 /*
344  * Device methods
345  */
346 
347 /**
348  * ice_register - register device method callback
349  * @dev: the device being registered
350  *
351  * Returns a pointer to the shared context structure, which is used by iflib.
352  */
353 static void *
354 ice_register(device_t dev __unused)
355 {
356 	return &ice_sctx;
357 } /* ice_register */
358 
359 /**
360  * ice_setup_scctx - Setup the iflib softc context structure
361  * @sc: the device private structure
362  *
363  * Setup the parameters in if_softc_ctx_t structure used by the iflib stack
364  * when loading.
365  */
366 static void
367 ice_setup_scctx(struct ice_softc *sc)
368 {
369 	if_softc_ctx_t scctx = sc->scctx;
370 	struct ice_hw *hw = &sc->hw;
371 	device_t dev = sc->dev;
372 	bool safe_mode, recovery_mode;
373 
374 	safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE);
375 	recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE);
376 
377 	/*
378 	 * If the driver loads in Safe mode or Recovery mode, limit iflib to
379 	 * a single queue pair.
380 	 */
381 	if (safe_mode || recovery_mode) {
382 		scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1;
383 		scctx->isc_ntxqsets_max = 1;
384 		scctx->isc_nrxqsets_max = 1;
385 	} else {
386 		/*
387 		 * iflib initially sets the isc_ntxqsets and isc_nrxqsets to
388 		 * the values of the override sysctls. Cache these initial
389 		 * values so that the driver can be aware of what the iflib
390 		 * sysctl value is when setting up MSI-X vectors.
391 		 */
392 		sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets;
393 		sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets;
394 
395 		if (scctx->isc_ntxqsets == 0)
396 			scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size;
397 		if (scctx->isc_nrxqsets == 0)
398 			scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size;
399 
400 		scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq;
401 		scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq;
402 
403 		/*
404 		 * Sanity check that the iflib sysctl values are within the
405 		 * maximum supported range.
406 		 */
407 		if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max)
408 			sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max;
409 		if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max)
410 			sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max;
411 	}
412 
413 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
414 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
415 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
416 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
417 
418 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
419 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
420 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
421 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
422 
423 	scctx->isc_msix_bar = pci_msix_table_bar(dev);
424 	scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size;
425 
426 	/*
427 	 * If the driver loads in recovery mode, disable Tx/Rx functionality
428 	 */
429 	if (recovery_mode)
430 		scctx->isc_txrx = &ice_recovery_txrx;
431 	else
432 		scctx->isc_txrx = &ice_txrx;
433 
434 	/*
435 	 * If the driver loads in Safe mode or Recovery mode, disable
436 	 * advanced features including hardware offloads.
437 	 */
438 	if (safe_mode || recovery_mode) {
439 		scctx->isc_capenable = ICE_SAFE_CAPS;
440 		scctx->isc_tx_csum_flags = 0;
441 	} else {
442 		scctx->isc_capenable = ICE_FULL_CAPS;
443 		scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
444 	}
445 
446 	scctx->isc_capabilities = scctx->isc_capenable;
447 } /* ice_setup_scctx */
448 
449 /**
450  * ice_if_attach_pre - Early device attach logic
451  * @ctx: the iflib context structure
452  *
453  * Called by iflib during the attach process. Earliest main driver entry
454  * point which performs necessary hardware and driver initialization. Called
455  * before the Tx and Rx queues are allocated.
456  */
457 static int
458 ice_if_attach_pre(if_ctx_t ctx)
459 {
460 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
461 	enum ice_fw_modes fw_mode;
462 	int status;
463 	if_softc_ctx_t scctx;
464 	struct ice_hw *hw;
465 	device_t dev;
466 	int err;
467 
468 	device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n");
469 
470 	ice_set_state(&sc->state, ICE_STATE_ATTACHING);
471 
472 	sc->ctx = ctx;
473 	sc->media = iflib_get_media(ctx);
474 	sc->sctx = iflib_get_sctx(ctx);
475 	sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx);
476 	sc->ifp = iflib_get_ifp(ctx);
477 
478 	dev = sc->dev = iflib_get_dev(ctx);
479 	scctx = sc->scctx = iflib_get_softc_ctx(ctx);
480 
481 	hw = &sc->hw;
482 	hw->back = sc;
483 
484 	snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name),
485 		 "%s:admin", device_get_nameunit(dev));
486 	mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF);
487 	callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0);
488 
489 	ASSERT_CTX_LOCKED(sc);
490 
491 	if (ice_pci_mapping(sc)) {
492 		err = (ENXIO);
493 		goto destroy_admin_timer;
494 	}
495 
496 	/* Save off the PCI information */
497 	ice_save_pci_info(hw, dev);
498 
499 	/* create tunables as early as possible */
500 	ice_add_device_tunables(sc);
501 
502 	/* Setup ControlQ lengths */
503 	ice_set_ctrlq_len(hw);
504 
505 reinit_hw:
506 
507 	fw_mode = ice_get_fw_mode(hw);
508 	if (fw_mode == ICE_FW_MODE_REC) {
509 		device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
510 
511 		err = ice_attach_pre_recovery_mode(sc);
512 		if (err)
513 			goto free_pci_mapping;
514 
515 		return (0);
516 	}
517 
518 	/* Initialize the hw data structure */
519 	status = ice_init_hw(hw);
520 	if (status) {
521 		if (status == ICE_ERR_FW_API_VER) {
522 			/* Enter recovery mode, so that the driver remains
523 			 * loaded. This way, if the system administrator
524 			 * cannot update the driver, they may still attempt to
525 			 * downgrade the NVM.
526 			 */
527 			err = ice_attach_pre_recovery_mode(sc);
528 			if (err)
529 				goto free_pci_mapping;
530 
531 			return (0);
532 		} else {
533 			err = EIO;
534 			device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n",
535 				      ice_status_str(status),
536 				      ice_aq_str(hw->adminq.sq_last_status));
537 		}
538 		goto free_pci_mapping;
539 	}
540 
541 	ice_init_device_features(sc);
542 
543 	/* Keep flag set by default */
544 	ice_set_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
545 
546 	/* Notify firmware of the device driver version */
547 	err = ice_send_version(sc);
548 	if (err)
549 		goto deinit_hw;
550 
551 	/*
552 	 * Success indicates a change was made that requires a reinitialization
553 	 * of the hardware
554 	 */
555 	err = ice_load_pkg_file(sc);
556 	if (!err) {
557 		ice_deinit_hw(hw);
558 		goto reinit_hw;
559 	}
560 
561 	err = ice_init_link_events(sc);
562 	if (err) {
563 		device_printf(dev, "ice_init_link_events failed: %s\n",
564 			      ice_err_str(err));
565 		goto deinit_hw;
566 	}
567 
568 	/* Initialize VLAN mode in FW; if dual VLAN mode is supported by the package
569 	 * and firmware, this will force them to use single VLAN mode.
570 	 */
571 	status = ice_set_vlan_mode(hw);
572 	if (status) {
573 		err = EIO;
574 		device_printf(dev, "Unable to initialize VLAN mode, err %s aq_err %s\n",
575 			      ice_status_str(status),
576 			      ice_aq_str(hw->adminq.sq_last_status));
577 		goto deinit_hw;
578 	}
579 
580 	ice_print_nvm_version(sc);
581 
582 	/* Setup the MAC address */
583 	iflib_set_mac(ctx, hw->port_info->mac.lan_addr);
584 
585 	/* Setup the iflib softc context structure */
586 	ice_setup_scctx(sc);
587 
588 	/* Initialize the Tx queue manager */
589 	err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq);
590 	if (err) {
591 		device_printf(dev, "Unable to initialize Tx queue manager: %s\n",
592 			      ice_err_str(err));
593 		goto deinit_hw;
594 	}
595 
596 	/* Initialize the Rx queue manager */
597 	err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq);
598 	if (err) {
599 		device_printf(dev, "Unable to initialize Rx queue manager: %s\n",
600 			      ice_err_str(err));
601 		goto free_tx_qmgr;
602 	}
603 
604 	/* Initialize the PF device interrupt resource manager */
605 	err = ice_alloc_intr_tracking(sc);
606 	if (err)
607 		/* Errors are already printed */
608 		goto free_rx_qmgr;
609 
610 	/* Determine maximum number of VSIs we'll prepare for */
611 	sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE,
612 				    hw->func_caps.guar_num_vsi);
613 
614 	if (!sc->num_available_vsi) {
615 		err = EIO;
616 		device_printf(dev, "No VSIs allocated to host\n");
617 		goto free_intr_tracking;
618 	}
619 
620 	/* Allocate storage for the VSI pointers */
621 	sc->all_vsi = (struct ice_vsi **)
622 		malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi,
623 		       M_ICE, M_WAITOK | M_ZERO);
624 	if (!sc->all_vsi) {
625 		err = ENOMEM;
626 		device_printf(dev, "Unable to allocate VSI array\n");
627 		goto free_intr_tracking;
628 	}
629 
630 	/*
631 	 * Prepare the statically allocated primary PF VSI in the softc
632 	 * structure. Other VSIs will be dynamically allocated as needed.
633 	 */
634 	ice_setup_pf_vsi(sc);
635 
636 	ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max,
637 	    scctx->isc_nrxqsets_max);
638 
639 	/* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */
640 	err = ice_allocate_msix(sc);
641 	if (err)
642 		goto free_main_vsi;
643 
644 	return 0;
645 
646 free_main_vsi:
647 	/* ice_release_vsi will free the queue maps if they were allocated */
648 	ice_release_vsi(&sc->pf_vsi);
649 	free(sc->all_vsi, M_ICE);
650 	sc->all_vsi = NULL;
651 free_intr_tracking:
652 	ice_free_intr_tracking(sc);
653 free_rx_qmgr:
654 	ice_resmgr_destroy(&sc->rx_qmgr);
655 free_tx_qmgr:
656 	ice_resmgr_destroy(&sc->tx_qmgr);
657 deinit_hw:
658 	ice_deinit_hw(hw);
659 free_pci_mapping:
660 	ice_free_pci_mapping(sc);
661 destroy_admin_timer:
662 	mtx_lock(&sc->admin_mtx);
663 	callout_stop(&sc->admin_timer);
664 	mtx_unlock(&sc->admin_mtx);
665 	mtx_destroy(&sc->admin_mtx);
666 	return err;
667 } /* ice_if_attach_pre */
668 
669 /**
670  * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery
671  * @sc: the device private softc
672  *
673  * Loads the device driver in limited Firmware Recovery mode, intended to
674  * allow users to update the firmware to attempt to recover the device.
675  *
676  * @remark We may enter recovery mode in case either (a) the firmware is
677  * detected to be in an invalid state and must be re-programmed, or (b) the
678  * driver detects that the loaded firmware has a non-compatible API version
679  * that the driver cannot operate with.
680  */
681 static int
682 ice_attach_pre_recovery_mode(struct ice_softc *sc)
683 {
684 	ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE);
685 
686 	/* Setup the iflib softc context */
687 	ice_setup_scctx(sc);
688 
689 	/* Setup the PF VSI back pointer */
690 	sc->pf_vsi.sc = sc;
691 
692 	/*
693 	 * We still need to allocate MSI-X vectors since we need one vector to
694 	 * run the administrative admin interrupt
695 	 */
696 	return ice_allocate_msix(sc);
697 }
698 
699 /**
700  * ice_update_link_status - notify OS of link state change
701  * @sc: device private softc structure
702  * @update_media: true if we should update media even if link didn't change
703  *
704  * Called to notify iflib core of link status changes. Should be called once
705  * during attach_post, and whenever link status changes during runtime.
706  *
707  * This call only updates the currently supported media types if the link
708  * status changed, or if update_media is set to true.
709  */
710 static void
711 ice_update_link_status(struct ice_softc *sc, bool update_media)
712 {
713 	struct ice_hw *hw = &sc->hw;
714 	int status;
715 
716 	/* Never report link up when in recovery mode */
717 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
718 		return;
719 
720 	/* Report link status to iflib only once each time it changes */
721 	if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) {
722 		if (sc->link_up) { /* link is up */
723 			uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info);
724 
725 			if (!(hw->port_info->phy.link_info_old.link_info & ICE_AQ_LINK_UP))
726 				ice_set_default_local_lldp_mib(sc);
727 
728 			iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
729 			ice_rdma_link_change(sc, LINK_STATE_UP, baudrate);
730 
731 			ice_link_up_msg(sc);
732 		} else { /* link is down */
733 			iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
734 			ice_rdma_link_change(sc, LINK_STATE_DOWN, 0);
735 		}
736 		update_media = true;
737 	}
738 
739 	/* Update the supported media types */
740 	if (update_media && !ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
741 		status = ice_add_media_types(sc, sc->media);
742 		if (status)
743 			device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
744 				      ice_status_str(status),
745 				      ice_aq_str(hw->adminq.sq_last_status));
746 	}
747 }
748 
749 /**
750  * ice_if_attach_post - Late device attach logic
751  * @ctx: the iflib context structure
752  *
753  * Called by iflib to finish up attaching the device. Performs any attach
754  * logic which must wait until after the Tx and Rx queues have been
755  * allocated.
756  */
757 static int
758 ice_if_attach_post(if_ctx_t ctx)
759 {
760 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
761 	if_t ifp = iflib_get_ifp(ctx);
762 	int status;
763 	int err;
764 
765 	ASSERT_CTX_LOCKED(sc);
766 
767 	/* We don't yet support loading if MSI-X is not supported */
768 	if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) {
769 		device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n");
770 		return (ENOTSUP);
771 	}
772 
773 	/* The ifnet structure hasn't yet been initialized when the attach_pre
774 	 * handler is called, so wait until attach_post to setup the
775 	 * isc_max_frame_size.
776 	 */
777 	sc->scctx->isc_max_frame_size = if_getmtu(ifp) +
778 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
779 
780 	/*
781 	 * If we are in recovery mode, only perform a limited subset of
782 	 * initialization to support NVM recovery.
783 	 */
784 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
785 		ice_attach_post_recovery_mode(sc);
786 		return (0);
787 	}
788 
789 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
790 
791 	err = ice_initialize_vsi(&sc->pf_vsi);
792 	if (err) {
793 		device_printf(sc->dev, "Unable to initialize Main VSI: %s\n",
794 			      ice_err_str(err));
795 		return err;
796 	}
797 
798 	/* Enable FW health event reporting */
799 	ice_init_health_events(sc);
800 
801 	/* Configure the main PF VSI for RSS */
802 	err = ice_config_rss(&sc->pf_vsi);
803 	if (err) {
804 		device_printf(sc->dev,
805 			      "Unable to configure RSS for the main VSI, err %s\n",
806 			      ice_err_str(err));
807 		return err;
808 	}
809 
810 	/* Configure switch to drop transmitted LLDP and PAUSE frames */
811 	err = ice_cfg_pf_ethertype_filters(sc);
812 	if (err)
813 		return err;
814 
815 	ice_get_and_print_bus_info(sc);
816 
817 	ice_set_link_management_mode(sc);
818 
819 	ice_init_saved_phy_cfg(sc);
820 
821 	ice_cfg_pba_num(sc);
822 
823 	/* Set a default value for PFC mode on attach since the FW state is unknown
824 	 * before sysctl tunables are executed and it can't be queried. This fixes an
825 	 * issue when loading the driver with the FW LLDP agent enabled but the FW
826 	 * was previously in DSCP PFC mode.
827 	 */
828 	status = ice_aq_set_pfc_mode(&sc->hw, ICE_AQC_PFC_VLAN_BASED_PFC, NULL);
829 	if (status)
830 		device_printf(sc->dev, "Setting pfc mode failed, status %s\n", ice_status_str(status));
831 
832 	ice_add_device_sysctls(sc);
833 
834 	/* Get DCBX/LLDP state and start DCBX agent */
835 	ice_init_dcb_setup(sc);
836 
837 	/* Setup link, if PHY FW is ready */
838 	ice_init_link(sc);
839 
840 	/* Configure interrupt causes for the administrative interrupt */
841 	ice_configure_misc_interrupts(sc);
842 
843 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
844 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
845 
846 	err = ice_rdma_pf_attach(sc);
847 	if (err)
848 		return (err);
849 
850 	/* Start the admin timer */
851 	mtx_lock(&sc->admin_mtx);
852 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
853 	mtx_unlock(&sc->admin_mtx);
854 
855 	if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
856 		 !ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
857 		ice_set_state(&sc->state, ICE_STATE_FIRST_INIT_LINK);
858 
859 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
860 
861 	return 0;
862 } /* ice_if_attach_post */
863 
864 /**
865  * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery
866  * @sc: the device private softc
867  *
868  * Performs minimal work to prepare the driver to recover an NVM in case the
869  * firmware is in recovery mode.
870  */
871 static void
872 ice_attach_post_recovery_mode(struct ice_softc *sc)
873 {
874 	/* Configure interrupt causes for the administrative interrupt */
875 	ice_configure_misc_interrupts(sc);
876 
877 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
878 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
879 
880 	/* Start the admin timer */
881 	mtx_lock(&sc->admin_mtx);
882 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
883 	mtx_unlock(&sc->admin_mtx);
884 
885 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
886 }
887 
888 /**
889  * ice_free_irqvs - Free IRQ vector memory
890  * @sc: the device private softc structure
891  *
892  * Free IRQ vector memory allocated during ice_if_msix_intr_assign.
893  */
894 static void
895 ice_free_irqvs(struct ice_softc *sc)
896 {
897 	struct ice_vsi *vsi = &sc->pf_vsi;
898 	if_ctx_t ctx = sc->ctx;
899 	int i;
900 
901 	/* If the irqvs array is NULL, then there are no vectors to free */
902 	if (sc->irqvs == NULL)
903 		return;
904 
905 	/* Free the IRQ vectors */
906 	for (i = 0; i < sc->num_irq_vectors; i++)
907 		iflib_irq_free(ctx, &sc->irqvs[i].irq);
908 
909 	/* Clear the irqv pointers */
910 	for (i = 0; i < vsi->num_rx_queues; i++)
911 		vsi->rx_queues[i].irqv = NULL;
912 
913 	for (i = 0; i < vsi->num_tx_queues; i++)
914 		vsi->tx_queues[i].irqv = NULL;
915 
916 	/* Release the vector array memory */
917 	free(sc->irqvs, M_ICE);
918 	sc->irqvs = NULL;
919 	sc->num_irq_vectors = 0;
920 }
921 
922 /**
923  * ice_if_detach - Device driver detach logic
924  * @ctx: iflib context structure
925  *
926  * Perform device shutdown logic to detach the device driver.
927  *
928  * Note that there is no guarantee of the ordering of ice_if_queues_free() and
929  * ice_if_detach(). It is possible for the functions to be called in either
930  * order, and they must not assume to have a strict ordering.
931  */
932 static int
933 ice_if_detach(if_ctx_t ctx)
934 {
935 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
936 	struct ice_vsi *vsi = &sc->pf_vsi;
937 	int status;
938 	int i;
939 
940 	ASSERT_CTX_LOCKED(sc);
941 
942 	/* Indicate that we're detaching */
943 	ice_set_state(&sc->state, ICE_STATE_DETACHING);
944 
945 	/* Stop the admin timer */
946 	mtx_lock(&sc->admin_mtx);
947 	callout_stop(&sc->admin_timer);
948 	mtx_unlock(&sc->admin_mtx);
949 	mtx_destroy(&sc->admin_mtx);
950 
951 	/* Remove additional interfaces if they exist */
952 	if (sc->mirr_if)
953 		ice_destroy_mirror_interface(sc);
954 	ice_rdma_pf_detach(sc);
955 
956 	/* Free allocated media types */
957 	ifmedia_removeall(sc->media);
958 
959 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
960 	 * pointers. Note, the calls here and those in ice_if_queues_free()
961 	 * are *BOTH* necessary, as we cannot guarantee which path will be
962 	 * run first
963 	 */
964 	ice_vsi_del_txqs_ctx(vsi);
965 	ice_vsi_del_rxqs_ctx(vsi);
966 
967 	/* Release MSI-X resources */
968 	ice_free_irqvs(sc);
969 
970 	for (i = 0; i < sc->num_available_vsi; i++) {
971 		if (sc->all_vsi[i])
972 			ice_release_vsi(sc->all_vsi[i]);
973 	}
974 
975 	if (sc->all_vsi) {
976 		free(sc->all_vsi, M_ICE);
977 		sc->all_vsi = NULL;
978 	}
979 
980 	/* Release MSI-X memory */
981 	pci_release_msi(sc->dev);
982 
983 	if (sc->msix_table != NULL) {
984 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
985 				     rman_get_rid(sc->msix_table),
986 				     sc->msix_table);
987 		sc->msix_table = NULL;
988 	}
989 
990 	ice_free_intr_tracking(sc);
991 
992 	/* Destroy the queue managers */
993 	ice_resmgr_destroy(&sc->tx_qmgr);
994 	ice_resmgr_destroy(&sc->rx_qmgr);
995 
996 	if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
997 		ice_deinit_hw(&sc->hw);
998 
999 	IFLIB_CTX_UNLOCK(sc);
1000 	status = ice_reset(&sc->hw, ICE_RESET_PFR);
1001 	IFLIB_CTX_LOCK(sc);
1002 	if (status) {
1003 		device_printf(sc->dev, "device PF reset failed, err %s\n",
1004 			      ice_status_str(status));
1005 	}
1006 
1007 	ice_free_pci_mapping(sc);
1008 
1009 	return 0;
1010 } /* ice_if_detach */
1011 
1012 /**
1013  * ice_if_tx_queues_alloc - Allocate Tx queue memory
1014  * @ctx: iflib context structure
1015  * @vaddrs: virtual addresses for the queue memory
1016  * @paddrs: physical addresses for the queue memory
1017  * @ntxqs: the number of Tx queues per set (should always be 1)
1018  * @ntxqsets: the number of Tx queue sets to allocate
1019  *
1020  * Called by iflib to allocate Tx queues for the device. Allocates driver
1021  * memory to track each queue, the status arrays used for descriptor
1022  * status reporting, and Tx queue sysctls.
1023  */
1024 static int
1025 ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1026 		       int __invariant_only ntxqs, int ntxqsets)
1027 {
1028 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1029 	struct ice_vsi *vsi = &sc->pf_vsi;
1030 	struct ice_tx_queue *txq;
1031 	int err, i, j;
1032 
1033 	MPASS(ntxqs == 1);
1034 	MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
1035 	ASSERT_CTX_LOCKED(sc);
1036 
1037 	/* Do not bother allocating queues if we're in recovery mode */
1038 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1039 		return (0);
1040 
1041 	/* Allocate queue structure memory */
1042 	if (!(vsi->tx_queues =
1043 	      (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1044 		device_printf(sc->dev, "Unable to allocate Tx queue memory\n");
1045 		return (ENOMEM);
1046 	}
1047 
1048 	/* Allocate report status arrays */
1049 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1050 		if (!(txq->tx_rsq =
1051 		      (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
1052 			device_printf(sc->dev, "Unable to allocate tx_rsq memory\n");
1053 			err = ENOMEM;
1054 			goto free_tx_queues;
1055 		}
1056 		/* Initialize report status array */
1057 		for (j = 0; j < sc->scctx->isc_ntxd[0]; j++)
1058 			txq->tx_rsq[j] = QIDX_INVALID;
1059 	}
1060 
1061 	/* Assign queues from PF space to the main VSI */
1062 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets);
1063 	if (err) {
1064 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1065 			      ice_err_str(err));
1066 		goto free_tx_queues;
1067 	}
1068 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1069 
1070 	/* Add Tx queue sysctls context */
1071 	ice_vsi_add_txqs_ctx(vsi);
1072 
1073 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1074 		/* q_handle == me when only one TC */
1075 		txq->me = txq->q_handle = i;
1076 		txq->vsi = vsi;
1077 
1078 		/* store the queue size for easier access */
1079 		txq->desc_count = sc->scctx->isc_ntxd[0];
1080 
1081 		/* get the virtual and physical address of the hardware queues */
1082 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
1083 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
1084 		txq->tx_paddr = paddrs[i];
1085 
1086 		ice_add_txq_sysctls(txq);
1087 	}
1088 
1089 	vsi->num_tx_queues = ntxqsets;
1090 
1091 	return (0);
1092 
1093 free_tx_queues:
1094 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1095 		if (txq->tx_rsq != NULL) {
1096 			free(txq->tx_rsq, M_ICE);
1097 			txq->tx_rsq = NULL;
1098 		}
1099 	}
1100 	free(vsi->tx_queues, M_ICE);
1101 	vsi->tx_queues = NULL;
1102 	return err;
1103 }
1104 
1105 /**
1106  * ice_if_rx_queues_alloc - Allocate Rx queue memory
1107  * @ctx: iflib context structure
1108  * @vaddrs: virtual addresses for the queue memory
1109  * @paddrs: physical addresses for the queue memory
1110  * @nrxqs: number of Rx queues per set (should always be 1)
1111  * @nrxqsets: number of Rx queue sets to allocate
1112  *
1113  * Called by iflib to allocate Rx queues for the device. Allocates driver
1114  * memory to track each queue, as well as sets up the Rx queue sysctls.
1115  */
1116 static int
1117 ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1118 		       int __invariant_only nrxqs, int nrxqsets)
1119 {
1120 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1121 	struct ice_vsi *vsi = &sc->pf_vsi;
1122 	struct ice_rx_queue *rxq;
1123 	int err, i;
1124 
1125 	MPASS(nrxqs == 1);
1126 	MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
1127 	ASSERT_CTX_LOCKED(sc);
1128 
1129 	/* Do not bother allocating queues if we're in recovery mode */
1130 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1131 		return (0);
1132 
1133 	/* Allocate queue structure memory */
1134 	if (!(vsi->rx_queues =
1135 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1136 		device_printf(sc->dev, "Unable to allocate Rx queue memory\n");
1137 		return (ENOMEM);
1138 	}
1139 
1140 	/* Assign queues from PF space to the main VSI */
1141 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets);
1142 	if (err) {
1143 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1144 			      ice_err_str(err));
1145 		goto free_rx_queues;
1146 	}
1147 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1148 
1149 	/* Add Rx queue sysctls context */
1150 	ice_vsi_add_rxqs_ctx(vsi);
1151 
1152 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
1153 		rxq->me = i;
1154 		rxq->vsi = vsi;
1155 
1156 		/* store the queue size for easier access */
1157 		rxq->desc_count = sc->scctx->isc_nrxd[0];
1158 
1159 		/* get the virtual and physical address of the hardware queues */
1160 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
1161 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
1162 		rxq->rx_paddr = paddrs[i];
1163 
1164 		ice_add_rxq_sysctls(rxq);
1165 	}
1166 
1167 	vsi->num_rx_queues = nrxqsets;
1168 
1169 	return (0);
1170 
1171 free_rx_queues:
1172 	free(vsi->rx_queues, M_ICE);
1173 	vsi->rx_queues = NULL;
1174 	return err;
1175 }
1176 
1177 /**
1178  * ice_if_queues_free - Free queue memory
1179  * @ctx: the iflib context structure
1180  *
1181  * Free queue memory allocated by ice_if_tx_queues_alloc() and
1182  * ice_if_rx_queues_alloc().
1183  *
1184  * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be
1185  * called in the same order. It's possible for ice_if_queues_free() to be
1186  * called prior to ice_if_detach(), and vice versa.
1187  *
1188  * For this reason, the main VSI is a static member of the ice_softc, which is
1189  * not free'd until after iflib finishes calling both of these functions.
1190  *
1191  * Thus, care must be taken in how we manage the memory being freed by this
1192  * function, and in what tasks it can and must perform.
1193  */
1194 static void
1195 ice_if_queues_free(if_ctx_t ctx)
1196 {
1197 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1198 	struct ice_vsi *vsi = &sc->pf_vsi;
1199 	struct ice_tx_queue *txq;
1200 	int i;
1201 
1202 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
1203 	 * pointers. Note, the calls here and those in ice_if_detach()
1204 	 * are *BOTH* necessary, as we cannot guarantee which path will be
1205 	 * run first
1206 	 */
1207 	ice_vsi_del_txqs_ctx(vsi);
1208 	ice_vsi_del_rxqs_ctx(vsi);
1209 
1210 	/* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */
1211 	ice_free_irqvs(sc);
1212 
1213 	if (vsi->tx_queues != NULL) {
1214 		/* free the tx_rsq arrays */
1215 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1216 			if (txq->tx_rsq != NULL) {
1217 				free(txq->tx_rsq, M_ICE);
1218 				txq->tx_rsq = NULL;
1219 			}
1220 		}
1221 		free(vsi->tx_queues, M_ICE);
1222 		vsi->tx_queues = NULL;
1223 		vsi->num_tx_queues = 0;
1224 	}
1225 	if (vsi->rx_queues != NULL) {
1226 		free(vsi->rx_queues, M_ICE);
1227 		vsi->rx_queues = NULL;
1228 		vsi->num_rx_queues = 0;
1229 	}
1230 }
1231 
1232 /**
1233  * ice_msix_que - Fast interrupt handler for MSI-X receive queues
1234  * @arg: The Rx queue memory
1235  *
1236  * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when
1237  * an MSI-X interrupt for a given queue is triggered. Currently this just asks
1238  * iflib to schedule the main Rx thread.
1239  */
1240 static int
1241 ice_msix_que(void *arg)
1242 {
1243 	struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg;
1244 
1245 	/* TODO: dynamic ITR algorithm?? */
1246 
1247 	return (FILTER_SCHEDULE_THREAD);
1248 }
1249 
1250 /**
1251  * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt
1252  * @arg: pointer to device softc memory
1253  *
1254  * Called by iflib when an administrative interrupt occurs. Should perform any
1255  * fast logic for handling the interrupt cause, and then indicate whether the
1256  * admin task needs to be queued.
1257  */
1258 static int
1259 ice_msix_admin(void *arg)
1260 {
1261 	struct ice_softc *sc = (struct ice_softc *)arg;
1262 	struct ice_hw *hw = &sc->hw;
1263 	device_t dev = sc->dev;
1264 	u32 oicr;
1265 
1266 	/* There is no safe way to modify the enabled miscellaneous causes of
1267 	 * the OICR vector at runtime, as doing so would be prone to race
1268 	 * conditions. Reading PFINT_OICR will unmask the associated interrupt
1269 	 * causes and allow future interrupts to occur. The admin interrupt
1270 	 * vector will not be re-enabled until after we exit this function,
1271 	 * but any delayed tasks must be resilient against possible "late
1272 	 * arrival" interrupts that occur while we're already handling the
1273 	 * task. This is done by using state bits and serializing these
1274 	 * delayed tasks via the admin status task function.
1275 	 */
1276 	oicr = rd32(hw, PFINT_OICR);
1277 
1278 	/* Processing multiple controlq interrupts on a single vector does not
1279 	 * provide an indication of which controlq triggered the interrupt.
1280 	 * We might try reading the INTEVENT bit of the respective PFINT_*_CTL
1281 	 * registers. However, the INTEVENT bit is not guaranteed to be set as
1282 	 * it gets automatically cleared when the hardware acknowledges the
1283 	 * interrupt.
1284 	 *
1285 	 * This means we don't really have a good indication of whether or
1286 	 * which controlq triggered this interrupt. We'll just notify the
1287 	 * admin task that it should check all the controlqs.
1288 	 */
1289 	ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
1290 
1291 	if (oicr & PFINT_OICR_VFLR_M) {
1292 		ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING);
1293 	}
1294 
1295 	if (oicr & PFINT_OICR_MAL_DETECT_M) {
1296 		ice_set_state(&sc->state, ICE_STATE_MDD_PENDING);
1297 	}
1298 
1299 	if (oicr & PFINT_OICR_GRST_M) {
1300 		u32 reset;
1301 
1302 		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
1303 			GLGEN_RSTAT_RESET_TYPE_S;
1304 
1305 		if (reset == ICE_RESET_CORER)
1306 			sc->soft_stats.corer_count++;
1307 		else if (reset == ICE_RESET_GLOBR)
1308 			sc->soft_stats.globr_count++;
1309 		else
1310 			sc->soft_stats.empr_count++;
1311 
1312 		/* There are a couple of bits at play for handling resets.
1313 		 * First, the ICE_STATE_RESET_OICR_RECV bit is used to
1314 		 * indicate that the driver has received an OICR with a reset
1315 		 * bit active, indicating that a CORER/GLOBR/EMPR is about to
1316 		 * happen. Second, we set hw->reset_ongoing to indicate that
1317 		 * the hardware is in reset. We will set this back to false as
1318 		 * soon as the driver has determined that the hardware is out
1319 		 * of reset.
1320 		 *
1321 		 * If the driver wishes to trigger a request, it can set one of
1322 		 * the ICE_STATE_RESET_*_REQ bits, which will trigger the
1323 		 * correct type of reset.
1324 		 */
1325 		if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV)) {
1326 			hw->reset_ongoing = true;
1327 			/*
1328 			 * During the NVM update process, there is a driver reset and link
1329 			 * goes down and then up. The below if-statement prevents a second
1330 			 * link flap from occurring in ice_if_init().
1331 			 */
1332 			if (if_getflags(sc->ifp) & IFF_UP)
1333 				ice_set_state(&sc->state, ICE_STATE_FIRST_INIT_LINK);
1334 		}
1335 	}
1336 
1337 	if (oicr & PFINT_OICR_ECC_ERR_M) {
1338 		device_printf(dev, "ECC Error detected!\n");
1339 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1340 	}
1341 
1342 	if (oicr & (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M)) {
1343 		if (oicr & PFINT_OICR_HMC_ERR_M)
1344 			/* Log the HMC errors */
1345 			ice_log_hmc_error(hw, dev);
1346 		ice_rdma_notify_pe_intr(sc, oicr);
1347 	}
1348 
1349 	if (oicr & PFINT_OICR_PCI_EXCEPTION_M) {
1350 		device_printf(dev, "PCI Exception detected!\n");
1351 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1352 	}
1353 
1354 	return (FILTER_SCHEDULE_THREAD);
1355 }
1356 
1357 /**
1358  * ice_allocate_msix - Allocate MSI-X vectors for the interface
1359  * @sc: the device private softc
1360  *
1361  * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process.
1362  *
1363  * First, determine a suitable total number of vectors based on the number
1364  * of CPUs, RSS buckets, the administrative vector, and other demands such as
1365  * RDMA.
1366  *
1367  * Request the desired amount of vectors, and see how many we obtain. If we
1368  * don't obtain as many as desired, reduce the demands by lowering the number
1369  * of requested queues or reducing the demand from other features such as
1370  * RDMA.
1371  *
1372  * @remark This function is required because the driver sets the
1373  * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors
1374  * manually.
1375  *
1376  * @remark This driver will only use MSI-X vectors. If this is not possible,
1377  * neither MSI or legacy interrupts will be tried.
1378  *
1379  * @remark if it exists, os_imgr is initialized here for keeping track of
1380  * the assignments of extra MSIX vectors.
1381  *
1382  * @post on success this function must set the following scctx parameters:
1383  * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr.
1384  *
1385  * @returns zero on success or an error code on failure.
1386  */
1387 static int
1388 ice_allocate_msix(struct ice_softc *sc)
1389 {
1390 	bool iflib_override_queue_count = false;
1391 	if_softc_ctx_t scctx = sc->scctx;
1392 	device_t dev = sc->dev;
1393 	cpuset_t cpus;
1394 	int bar, queues, vectors, requested;
1395 	int err = 0;
1396 	int rdma;
1397 
1398 	/* Allocate the MSI-X bar */
1399 	bar = scctx->isc_msix_bar;
1400 	sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE);
1401 	if (!sc->msix_table) {
1402 		device_printf(dev, "Unable to map MSI-X table\n");
1403 		return (ENOMEM);
1404 	}
1405 
1406 	/* Check if the iflib queue count sysctls have been set */
1407 	if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs)
1408 		iflib_override_queue_count = true;
1409 
1410 	err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus);
1411 	if (err) {
1412 		device_printf(dev, "%s: Unable to fetch the CPU list: %s\n",
1413 			      __func__, ice_err_str(err));
1414 		CPU_COPY(&all_cpus, &cpus);
1415 	}
1416 
1417 	/* Attempt to mimic behavior of iflib_msix_init */
1418 	if (iflib_override_queue_count) {
1419 		/*
1420 		 * If the override sysctls have been set, limit the queues to
1421 		 * the number of logical CPUs.
1422 		 */
1423 		queues = mp_ncpus;
1424 	} else {
1425 		/*
1426 		 * Otherwise, limit the queue count to the CPUs associated
1427 		 * with the NUMA node the device is associated with.
1428 		 */
1429 		queues = CPU_COUNT(&cpus);
1430 	}
1431 
1432 	/* Clamp to the number of RSS buckets */
1433 	queues = imin(queues, rss_getnumbuckets());
1434 
1435 	/*
1436 	 * Clamp the number of queue pairs to the minimum of the requested Tx
1437 	 * and Rx queues.
1438 	 */
1439 	queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
1440 	queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
1441 
1442 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) {
1443 		/*
1444 		 * Choose a number of RDMA vectors based on the number of CPUs
1445 		 * up to a maximum
1446 		 */
1447 		rdma = min(CPU_COUNT(&cpus), ICE_RDMA_MAX_MSIX);
1448 
1449 		/* Further limit by the user configurable tunable */
1450 		rdma = min(rdma, ice_rdma_max_msix);
1451 	} else {
1452 		rdma = 0;
1453 	}
1454 
1455 	/*
1456 	 * Determine the number of vectors to request. Note that we also need
1457 	 * to allocate one vector for administrative tasks.
1458 	 */
1459 	requested = rdma + queues + 1;
1460 	/* Add extra vectors requested by the user for later subinterface
1461 	 * creation.
1462 	 */
1463 	if_ctx_t ctx = sc->ctx;
1464 	u32 extra_vectors = iflib_get_extra_msix_vectors_sysctl(ctx);
1465 	requested += extra_vectors;
1466 
1467 	vectors = requested;
1468 	err = pci_alloc_msix(dev, &vectors);
1469 	if (err) {
1470 		device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n",
1471 			      vectors, ice_err_str(err));
1472 		goto err_free_msix_table;
1473 	}
1474 
1475 	/* If we don't receive enough vectors, reduce demands */
1476 	if (vectors < requested) {
1477 		int diff = requested - vectors;
1478 
1479 		device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n",
1480 			      requested, vectors);
1481 
1482 		diff += extra_vectors;
1483 		extra_vectors = 0;
1484 		/*
1485 		 * The OS didn't grant us the requested number of vectors.
1486 		 * Check to see if we can reduce demands by limiting the
1487 		 * number of vectors allocated to certain features.
1488 		 */
1489 
1490 		if (rdma >= diff) {
1491 			/* Reduce the number of RDMA vectors we reserve */
1492 			rdma -= diff;
1493 			diff = 0;
1494 		} else {
1495 			/* Disable RDMA and reduce the difference */
1496 			ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
1497 			diff -= rdma;
1498 			rdma = 0;
1499 		}
1500 
1501 		/*
1502 		 * If we still have a difference, we need to reduce the number
1503 		 * of queue pairs.
1504 		 *
1505 		 * However, we still need at least one vector for the admin
1506 		 * interrupt and one queue pair.
1507 		 */
1508 		if (queues <= diff) {
1509 			device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n");
1510 			err = (ERANGE);
1511 			goto err_pci_release_msi;
1512 		}
1513 
1514 		queues -= diff;
1515 	}
1516 
1517 	device_printf(dev, "Using %d Tx and Rx queues\n", queues);
1518 	if (rdma)
1519 		device_printf(dev, "Reserving %d MSI-X interrupts for iRDMA\n",
1520 			      rdma);
1521 	device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
1522 		      vectors);
1523 
1524 	/* Split resulting vectors back into requested splits */
1525 	scctx->isc_vectors = vectors;
1526 	scctx->isc_nrxqsets = queues;
1527 	scctx->isc_ntxqsets = queues;
1528 	scctx->isc_intr = IFLIB_INTR_MSIX;
1529 
1530 	sc->irdma_vectors = rdma;
1531 
1532 	/* Interrupt allocation tracking isn't required in recovery mode,
1533 	 * since neither RDMA nor VFs are enabled.
1534 	 */
1535 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1536 		return (0);
1537 
1538 	/* Keep track of which interrupt indices are being used for what */
1539 	sc->lan_vectors = vectors - rdma;
1540 	sc->lan_vectors -= extra_vectors;
1541 	err = ice_resmgr_assign_contiguous(&sc->dev_imgr, sc->pf_imap, sc->lan_vectors);
1542 	if (err) {
1543 		device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
1544 			      ice_err_str(err));
1545 		goto err_pci_release_msi;
1546 	}
1547 	err = ice_resmgr_assign_contiguous(&sc->dev_imgr, sc->rdma_imap, rdma);
1548 	if (err) {
1549 		device_printf(dev, "Unable to assign PF RDMA interrupt mapping: %s\n",
1550 			      ice_err_str(err));
1551 		goto err_release_pf_imap;
1552 	}
1553 	sc->extra_vectors = extra_vectors;
1554 	/* Setup another resource manager to track the assignments of extra OS
1555 	 * vectors. These OS interrupt allocations don't need to be contiguous,
1556 	 * unlike the ones that come from the device.
1557 	 */
1558 	err = ice_resmgr_init(&sc->os_imgr, sc->extra_vectors);
1559 	if (err) {
1560 		device_printf(dev, "Unable to initialize OS extra interrupt manager: %s\n",
1561 			      ice_err_str(err));
1562 		ice_resmgr_release_map(&sc->dev_imgr, sc->rdma_imap,
1563 					    rdma);
1564 		goto err_release_pf_imap;
1565 	}
1566 	return (0);
1567 
1568 err_release_pf_imap:
1569 	ice_resmgr_release_map(&sc->dev_imgr, sc->pf_imap,
1570 				    sc->lan_vectors);
1571 err_pci_release_msi:
1572 	pci_release_msi(dev);
1573 err_free_msix_table:
1574 	if (sc->msix_table != NULL) {
1575 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
1576 				rman_get_rid(sc->msix_table),
1577 				sc->msix_table);
1578 		sc->msix_table = NULL;
1579 	}
1580 
1581 	return (err);
1582 }
1583 
1584 /**
1585  * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues
1586  * @ctx: the iflib context structure
1587  * @msix: the number of vectors we were assigned
1588  *
1589  * Called by iflib to assign MSI-X vectors to queues. Currently requires that
1590  * we get at least the same number of vectors as we have queues, and that we
1591  * always have the same number of Tx and Rx queues.
1592  *
1593  * Tx queues use a softirq instead of using their own hardware interrupt.
1594  */
1595 static int
1596 ice_if_msix_intr_assign(if_ctx_t ctx, int msix)
1597 {
1598 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1599 	struct ice_vsi *vsi = &sc->pf_vsi;
1600 	int err, i, vector;
1601 
1602 	ASSERT_CTX_LOCKED(sc);
1603 
1604 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
1605 		device_printf(sc->dev,
1606 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
1607 			      vsi->num_tx_queues, vsi->num_rx_queues);
1608 		return (EOPNOTSUPP);
1609 	}
1610 
1611 	if (msix < (vsi->num_rx_queues + 1)) {
1612 		device_printf(sc->dev,
1613 			      "Not enough MSI-X vectors to assign one vector to each queue pair\n");
1614 		return (EOPNOTSUPP);
1615 	}
1616 
1617 	/* Save the number of vectors for future use */
1618 	sc->num_irq_vectors = vsi->num_rx_queues + 1;
1619 
1620 	/* Allocate space to store the IRQ vector data */
1621 	if (!(sc->irqvs =
1622 	      (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors),
1623 					       M_ICE, M_NOWAIT))) {
1624 		device_printf(sc->dev,
1625 			      "Unable to allocate irqv memory\n");
1626 		return (ENOMEM);
1627 	}
1628 
1629 	/* Administrative interrupt events will use vector 0 */
1630 	err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN,
1631 				      ice_msix_admin, sc, 0, "admin");
1632 	if (err) {
1633 		device_printf(sc->dev,
1634 			      "Failed to register Admin queue handler: %s\n",
1635 			      ice_err_str(err));
1636 		goto free_irqvs;
1637 	}
1638 	sc->irqvs[0].me = 0;
1639 
1640 	/* Do not allocate queue interrupts when in recovery mode */
1641 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1642 		return (0);
1643 
1644 	int rid;
1645 	for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) {
1646 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1647 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1648 		char irq_name[16];
1649 
1650 		rid = vector + 1;
1651 
1652 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
1653 		err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid,
1654 					      IFLIB_INTR_RXTX, ice_msix_que,
1655 					      rxq, rxq->me, irq_name);
1656 		if (err) {
1657 			device_printf(sc->dev,
1658 				      "Failed to allocate q int %d err: %s\n",
1659 				      i, ice_err_str(err));
1660 			vector--;
1661 			i--;
1662 			goto fail;
1663 		}
1664 		sc->irqvs[vector].me = vector;
1665 		rxq->irqv = &sc->irqvs[vector];
1666 
1667 		bzero(irq_name, sizeof(irq_name));
1668 
1669 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
1670 		iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq,
1671 					    IFLIB_INTR_TX, txq,
1672 					    txq->me, irq_name);
1673 		txq->irqv = &sc->irqvs[vector];
1674 	}
1675 
1676 	/* For future interrupt assignments */
1677 	sc->last_rid = rid + sc->irdma_vectors;
1678 
1679 	return (0);
1680 fail:
1681 	for (; i >= 0; i--, vector--)
1682 		iflib_irq_free(ctx, &sc->irqvs[vector].irq);
1683 	iflib_irq_free(ctx, &sc->irqvs[0].irq);
1684 free_irqvs:
1685 	free(sc->irqvs, M_ICE);
1686 	sc->irqvs = NULL;
1687 	return err;
1688 }
1689 
1690 /**
1691  * ice_if_mtu_set - Set the device MTU
1692  * @ctx: iflib context structure
1693  * @mtu: the MTU requested
1694  *
1695  * Called by iflib to configure the device's Maximum Transmission Unit (MTU).
1696  *
1697  * @pre assumes the caller holds the iflib CTX lock
1698  */
1699 static int
1700 ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu)
1701 {
1702 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1703 
1704 	ASSERT_CTX_LOCKED(sc);
1705 
1706 	/* Do not support configuration when in recovery mode */
1707 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1708 		return (ENOSYS);
1709 
1710 	if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU)
1711 		return (EINVAL);
1712 
1713 	sc->scctx->isc_max_frame_size = mtu +
1714 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1715 
1716 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
1717 
1718 	return (0);
1719 }
1720 
1721 /**
1722  * ice_if_intr_enable - Enable device interrupts
1723  * @ctx: iflib context structure
1724  *
1725  * Called by iflib to request enabling device interrupts.
1726  */
1727 static void
1728 ice_if_intr_enable(if_ctx_t ctx)
1729 {
1730 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1731 	struct ice_vsi *vsi = &sc->pf_vsi;
1732 	struct ice_hw *hw = &sc->hw;
1733 
1734 	ASSERT_CTX_LOCKED(sc);
1735 
1736 	/* Enable ITR 0 */
1737 	ice_enable_intr(hw, sc->irqvs[0].me);
1738 
1739 	/* Do not enable queue interrupts in recovery mode */
1740 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1741 		return;
1742 
1743 	/* Enable all queue interrupts */
1744 	for (int i = 0; i < vsi->num_rx_queues; i++)
1745 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
1746 }
1747 
1748 /**
1749  * ice_if_intr_disable - Disable device interrupts
1750  * @ctx: iflib context structure
1751  *
1752  * Called by iflib to request disabling device interrupts.
1753  */
1754 static void
1755 ice_if_intr_disable(if_ctx_t ctx)
1756 {
1757 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1758 	struct ice_hw *hw = &sc->hw;
1759 	unsigned int i;
1760 
1761 	ASSERT_CTX_LOCKED(sc);
1762 
1763 	/* IFDI_INTR_DISABLE may be called prior to interrupts actually being
1764 	 * assigned to queues. Instead of assuming that the interrupt
1765 	 * assignment in the rx_queues structure is valid, just disable all
1766 	 * possible interrupts
1767 	 *
1768 	 * Note that we choose not to disable ITR 0 because this handles the
1769 	 * AdminQ interrupts, and we want to keep processing these even when
1770 	 * the interface is offline.
1771 	 */
1772 	for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++)
1773 		ice_disable_intr(hw, i);
1774 }
1775 
1776 /**
1777  * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
1778  * @ctx: iflib context structure
1779  * @rxqid: the Rx queue to enable
1780  *
1781  * Enable a specific Rx queue interrupt.
1782  *
1783  * This function is not protected by the iflib CTX lock.
1784  */
1785 static int
1786 ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
1787 {
1788 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1789 	struct ice_vsi *vsi = &sc->pf_vsi;
1790 	struct ice_hw *hw = &sc->hw;
1791 
1792 	/* Do not enable queue interrupts in recovery mode */
1793 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1794 		return (ENOSYS);
1795 
1796 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
1797 	return (0);
1798 }
1799 
1800 /**
1801  * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
1802  * @ctx: iflib context structure
1803  * @txqid: the Tx queue to enable
1804  *
1805  * Enable a specific Tx queue interrupt.
1806  *
1807  * This function is not protected by the iflib CTX lock.
1808  */
1809 static int
1810 ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
1811 {
1812 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1813 	struct ice_vsi *vsi = &sc->pf_vsi;
1814 	struct ice_hw *hw = &sc->hw;
1815 
1816 	/* Do not enable queue interrupts in recovery mode */
1817 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1818 		return (ENOSYS);
1819 
1820 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
1821 	return (0);
1822 }
1823 
1824 /**
1825  * ice_set_default_promisc_mask - Set default config for promisc settings
1826  * @promisc_mask: bitmask to setup
1827  *
1828  * The ice_(set|clear)_vsi_promisc() function expects a mask of promiscuous
1829  * modes to operate on. The mask used in here is the default one for the
1830  * driver, where promiscuous is enabled/disabled for all types of
1831  * non-VLAN-tagged/VLAN 0 traffic.
1832  */
1833 static void
1834 ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask)
1835 {
1836 	ice_zero_bitmap(promisc_mask, ICE_PROMISC_MAX);
1837 	ice_set_bit(ICE_PROMISC_UCAST_TX, promisc_mask);
1838 	ice_set_bit(ICE_PROMISC_UCAST_RX, promisc_mask);
1839 	ice_set_bit(ICE_PROMISC_MCAST_TX, promisc_mask);
1840 	ice_set_bit(ICE_PROMISC_MCAST_RX, promisc_mask);
1841 }
1842 
1843 /**
1844  * ice_if_promisc_set - Set device promiscuous mode
1845  * @ctx: iflib context structure
1846  * @flags: promiscuous flags to configure
1847  *
1848  * Called by iflib to configure device promiscuous mode.
1849  *
1850  * @remark Calls to this function will always overwrite the previous setting
1851  */
1852 static int
1853 ice_if_promisc_set(if_ctx_t ctx, int flags)
1854 {
1855 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1856 	struct ice_hw *hw = &sc->hw;
1857 	device_t dev = sc->dev;
1858 	int status;
1859 	bool promisc_enable = flags & IFF_PROMISC;
1860 	bool multi_enable = flags & IFF_ALLMULTI;
1861 	ice_declare_bitmap(promisc_mask, ICE_PROMISC_MAX);
1862 
1863 	/* Do not support configuration when in recovery mode */
1864 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1865 		return (ENOSYS);
1866 
1867 	ice_set_default_promisc_mask(promisc_mask);
1868 
1869 	if (multi_enable)
1870 		return (EOPNOTSUPP);
1871 
1872 	if (promisc_enable) {
1873 		status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx,
1874 					     promisc_mask, 0);
1875 		if (status && status != ICE_ERR_ALREADY_EXISTS) {
1876 			device_printf(dev,
1877 				      "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n",
1878 				      ice_status_str(status),
1879 				      ice_aq_str(hw->adminq.sq_last_status));
1880 			return (EIO);
1881 		}
1882 	} else {
1883 		status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx,
1884 					       promisc_mask, 0);
1885 		if (status) {
1886 			device_printf(dev,
1887 				      "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n",
1888 				      ice_status_str(status),
1889 				      ice_aq_str(hw->adminq.sq_last_status));
1890 			return (EIO);
1891 		}
1892 	}
1893 
1894 	return (0);
1895 }
1896 
1897 /**
1898  * ice_if_media_change - Change device media
1899  * @ctx: device ctx structure
1900  *
1901  * Called by iflib when a media change is requested. This operation is not
1902  * supported by the hardware, so we just return an error code.
1903  */
1904 static int
1905 ice_if_media_change(if_ctx_t ctx)
1906 {
1907 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1908 
1909 	device_printf(sc->dev, "Media change is not supported.\n");
1910 	return (ENODEV);
1911 }
1912 
1913 /**
1914  * ice_if_media_status - Report current device media
1915  * @ctx: iflib context structure
1916  * @ifmr: ifmedia request structure to update
1917  *
1918  * Updates the provided ifmr with current device media status, including link
1919  * status and media type.
1920  */
1921 static void
1922 ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr)
1923 {
1924 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1925 	struct ice_link_status *li = &sc->hw.port_info->phy.link_info;
1926 
1927 	ifmr->ifm_status = IFM_AVALID;
1928 	ifmr->ifm_active = IFM_ETHER;
1929 
1930 	/* Never report link up or media types when in recovery mode */
1931 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1932 		return;
1933 
1934 	if (!sc->link_up)
1935 		return;
1936 
1937 	ifmr->ifm_status |= IFM_ACTIVE;
1938 	ifmr->ifm_active |= IFM_FDX;
1939 
1940 	if (li->phy_type_low)
1941 		ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low);
1942 	else if (li->phy_type_high)
1943 		ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high);
1944 	else
1945 		ifmr->ifm_active |= IFM_UNKNOWN;
1946 
1947 	/* Report flow control status as well */
1948 	if (li->an_info & ICE_AQ_LINK_PAUSE_TX)
1949 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1950 	if (li->an_info & ICE_AQ_LINK_PAUSE_RX)
1951 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1952 }
1953 
1954 /**
1955  * ice_init_tx_tracking - Initialize Tx queue software tracking values
1956  * @vsi: the VSI to initialize
1957  *
1958  * Initialize Tx queue software tracking values, including the Report Status
1959  * queue, and related software tracking values.
1960  */
1961 static void
1962 ice_init_tx_tracking(struct ice_vsi *vsi)
1963 {
1964 	struct ice_tx_queue *txq;
1965 	size_t j;
1966 	int i;
1967 
1968 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1969 
1970 		txq->tx_rs_cidx = txq->tx_rs_pidx = 0;
1971 
1972 		/* Initialize the last processed descriptor to be the end of
1973 		 * the ring, rather than the start, so that we avoid an
1974 		 * off-by-one error in ice_ift_txd_credits_update for the
1975 		 * first packet.
1976 		 */
1977 		txq->tx_cidx_processed = txq->desc_count - 1;
1978 
1979 		for (j = 0; j < txq->desc_count; j++)
1980 			txq->tx_rsq[j] = QIDX_INVALID;
1981 	}
1982 }
1983 
1984 /**
1985  * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues
1986  * @sc: the device softc
1987  *
1988  * Called to update the Rx queue mbuf_sz parameter for configuring the receive
1989  * buffer sizes when programming hardware.
1990  */
1991 static void
1992 ice_update_rx_mbuf_sz(struct ice_softc *sc)
1993 {
1994 	uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx);
1995 	struct ice_vsi *vsi = &sc->pf_vsi;
1996 
1997 	MPASS(mbuf_sz <= UINT16_MAX);
1998 	vsi->mbuf_sz = mbuf_sz;
1999 }
2000 
2001 /**
2002  * ice_if_init - Initialize the device
2003  * @ctx: iflib ctx structure
2004  *
2005  * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes
2006  * device filters and prepares the Tx and Rx engines.
2007  *
2008  * @pre assumes the caller holds the iflib CTX lock
2009  */
2010 static void
2011 ice_if_init(if_ctx_t ctx)
2012 {
2013 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
2014 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2015 	device_t dev = sc->dev;
2016 	int err;
2017 
2018 	ASSERT_CTX_LOCKED(sc);
2019 
2020 	/*
2021 	 * We've seen an issue with 11.3/12.1 where sideband routines are
2022 	 * called after detach is called.  This would call routines after
2023 	 * if_stop, causing issues with the teardown process.  This has
2024 	 * seemingly been fixed in STABLE snapshots, but it seems like a
2025 	 * good idea to have this guard here regardless.
2026 	 */
2027 	if (ice_driver_is_detaching(sc))
2028 		return;
2029 
2030 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2031 		return;
2032 
2033 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
2034 		device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n");
2035 		return;
2036 	}
2037 
2038 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
2039 		device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n");
2040 		return;
2041 	}
2042 
2043 	ice_update_rx_mbuf_sz(sc);
2044 
2045 	/* Update the MAC address... User might use a LAA */
2046 	err = ice_update_laa_mac(sc);
2047 	if (err) {
2048 		device_printf(dev,
2049 			      "LAA address change failed, err %s\n",
2050 			      ice_err_str(err));
2051 		return;
2052 	}
2053 
2054 	/* Initialize software Tx tracking values */
2055 	ice_init_tx_tracking(&sc->pf_vsi);
2056 
2057 	err = ice_cfg_vsi_for_tx(&sc->pf_vsi);
2058 	if (err) {
2059 		device_printf(dev,
2060 			      "Unable to configure the main VSI for Tx: %s\n",
2061 			      ice_err_str(err));
2062 		return;
2063 	}
2064 
2065 	err = ice_cfg_vsi_for_rx(&sc->pf_vsi);
2066 	if (err) {
2067 		device_printf(dev,
2068 			      "Unable to configure the main VSI for Rx: %s\n",
2069 			      ice_err_str(err));
2070 		goto err_cleanup_tx;
2071 	}
2072 
2073 	err = ice_control_all_rx_queues(&sc->pf_vsi, true);
2074 	if (err) {
2075 		device_printf(dev,
2076 			      "Unable to enable Rx rings for transmit: %s\n",
2077 			      ice_err_str(err));
2078 		goto err_cleanup_tx;
2079 	}
2080 
2081 	err = ice_cfg_pf_default_mac_filters(sc);
2082 	if (err) {
2083 		device_printf(dev,
2084 			      "Unable to configure default MAC filters: %s\n",
2085 			      ice_err_str(err));
2086 		goto err_stop_rx;
2087 	}
2088 
2089 	/* We use software interrupts for Tx, so we only program the hardware
2090 	 * interrupts for Rx.
2091 	 */
2092 	ice_configure_all_rxq_interrupts(&sc->pf_vsi);
2093 	ice_configure_rx_itr(&sc->pf_vsi);
2094 
2095 	/* Configure promiscuous mode */
2096 	ice_if_promisc_set(ctx, if_getflags(sc->ifp));
2097 
2098 	if (!ice_testandclear_state(&sc->state, ICE_STATE_FIRST_INIT_LINK))
2099 		if (!sc->link_up && ((if_getflags(sc->ifp) & IFF_UP) ||
2100 			 ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN)))
2101 			ice_set_link(sc, true);
2102 
2103 	ice_rdma_pf_init(sc);
2104 
2105 	ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
2106 
2107 	if (sc->mirr_if && ice_testandclear_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT)) {
2108 		ice_clear_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED);
2109 		iflib_request_reset(sc->mirr_if->subctx);
2110 		iflib_admin_intr_deferred(sc->mirr_if->subctx);
2111 	}
2112 
2113 	return;
2114 
2115 err_stop_rx:
2116 	ice_control_all_rx_queues(&sc->pf_vsi, false);
2117 err_cleanup_tx:
2118 	ice_vsi_disable_tx(&sc->pf_vsi);
2119 }
2120 
2121 /**
2122  * ice_poll_for_media_avail - Re-enable link if media is detected
2123  * @sc: device private structure
2124  *
2125  * Intended to be called from the driver's timer function, this function
2126  * sends the Get Link Status AQ command and re-enables HW link if the
2127  * command says that media is available.
2128  *
2129  * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing,
2130  * since media removal events are supposed to be sent to the driver through
2131  * a link status event.
2132  */
2133 static void
2134 ice_poll_for_media_avail(struct ice_softc *sc)
2135 {
2136 	struct ice_hw *hw = &sc->hw;
2137 	struct ice_port_info *pi = hw->port_info;
2138 
2139 	/* E830 only: There's no interrupt for when the PHY FW has finished loading,
2140 	 * so poll for the status in the media task here if it's previously
2141 	 * been detected that it's still loading.
2142 	 */
2143 	if (ice_is_e830(hw) &&
2144 	    ice_test_state(&sc->state, ICE_STATE_PHY_FW_INIT_PENDING)) {
2145 		if (rd32(hw, GL_MNG_FWSM) & GL_MNG_FWSM_FW_LOADING_M)
2146 			ice_clear_state(&sc->state, ICE_STATE_PHY_FW_INIT_PENDING);
2147 		else
2148 			return;
2149 	}
2150 
2151 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2152 		pi->phy.get_link_info = true;
2153 		ice_get_link_status(pi, &sc->link_up);
2154 
2155 		if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
2156 			int status;
2157 
2158 			/* Re-enable link and re-apply user link settings */
2159 			if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) ||
2160 			    (if_getflags(sc->ifp) & IFF_UP)) {
2161 				ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
2162 
2163 				/* Update the OS about changes in media capability */
2164 				status = ice_add_media_types(sc, sc->media);
2165 				if (status)
2166 					device_printf(sc->dev,
2167 					    "Error adding device media types: %s aq_err %s\n",
2168 					    ice_status_str(status),
2169 					    ice_aq_str(hw->adminq.sq_last_status));
2170 			}
2171 
2172 			ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
2173 		}
2174 	}
2175 }
2176 
2177 /**
2178  * ice_if_timer - called by iflib periodically
2179  * @ctx: iflib ctx structure
2180  * @qid: the queue this timer was called for
2181  *
2182  * This callback is triggered by iflib periodically. We use it to update the
2183  * hw statistics.
2184  *
2185  * @remark this function is not protected by the iflib CTX lock.
2186  */
2187 static void
2188 ice_if_timer(if_ctx_t ctx, uint16_t qid)
2189 {
2190 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2191 	uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx;
2192 
2193 	if (qid != 0)
2194 		return;
2195 
2196 	/* Do not attempt to update stats when in recovery mode */
2197 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2198 		return;
2199 
2200 	/* Update device statistics */
2201 	ice_update_pf_stats(sc);
2202 
2203 	/*
2204 	 * For proper watchdog management, the iflib stack needs to know if
2205 	 * we've been paused during the last interval. Check if the
2206 	 * link_xoff_rx stat changed, and set the isc_pause_frames, if so.
2207 	 */
2208 	if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx)
2209 		sc->scctx->isc_pause_frames = 1;
2210 
2211 	/* Update the primary VSI stats */
2212 	ice_update_vsi_hw_stats(&sc->pf_vsi);
2213 
2214 	/* Update mirror VSI stats */
2215 	if (sc->mirr_if && sc->mirr_if->if_attached)
2216 		ice_update_vsi_hw_stats(sc->mirr_if->vsi);
2217 }
2218 
2219 /**
2220  * ice_admin_timer - called periodically to trigger the admin task
2221  * @arg: callout(9) argument pointing to the device private softc structure
2222  *
2223  * Timer function used as part of a callout(9) timer that will periodically
2224  * trigger the admin task, even when the interface is down.
2225  *
2226  * @remark this function is not called by iflib and is not protected by the
2227  * iflib CTX lock.
2228  *
2229  * @remark because this is a callout function, it cannot sleep and should not
2230  * attempt taking the iflib CTX lock.
2231  */
2232 static void
2233 ice_admin_timer(void *arg)
2234 {
2235 	struct ice_softc *sc = (struct ice_softc *)arg;
2236 
2237 	/*
2238 	 * There is a point where callout routines are no longer
2239 	 * cancelable.  So there exists a window of time where the
2240 	 * driver enters detach() and tries to cancel the callout, but the
2241 	 * callout routine has passed the cancellation point.  The detach()
2242 	 * routine is unaware of this and tries to free resources that the
2243 	 * callout routine needs.  So we check for the detach state flag to
2244 	 * at least shrink the window of opportunity.
2245 	 */
2246 	if (ice_driver_is_detaching(sc))
2247 		return;
2248 
2249 	/* Fire off the admin task */
2250 	iflib_admin_intr_deferred(sc->ctx);
2251 
2252 	/* Reschedule the admin timer */
2253 	callout_schedule(&sc->admin_timer, hz/2);
2254 }
2255 
2256 /**
2257  * ice_transition_recovery_mode - Transition to recovery mode
2258  * @sc: the device private softc
2259  *
2260  * Called when the driver detects that the firmware has entered recovery mode
2261  * at run time.
2262  */
2263 static void
2264 ice_transition_recovery_mode(struct ice_softc *sc)
2265 {
2266 	struct ice_vsi *vsi = &sc->pf_vsi;
2267 	int i;
2268 
2269 	device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
2270 
2271 	/* Tell the stack that the link has gone down */
2272 	iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
2273 
2274 	/* Request that the device be re-initialized */
2275 	ice_request_stack_reinit(sc);
2276 
2277 	ice_rdma_pf_detach(sc);
2278 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2279 
2280 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2281 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2282 
2283 	ice_vsi_del_txqs_ctx(vsi);
2284 	ice_vsi_del_rxqs_ctx(vsi);
2285 
2286 	for (i = 0; i < sc->num_available_vsi; i++) {
2287 		if (sc->all_vsi[i])
2288 			ice_release_vsi(sc->all_vsi[i]);
2289 	}
2290 	sc->num_available_vsi = 0;
2291 
2292 	if (sc->all_vsi) {
2293 		free(sc->all_vsi, M_ICE);
2294 		sc->all_vsi = NULL;
2295 	}
2296 
2297 	/* Destroy the interrupt manager */
2298 	ice_resmgr_destroy(&sc->dev_imgr);
2299 	/* Destroy the queue managers */
2300 	ice_resmgr_destroy(&sc->tx_qmgr);
2301 	ice_resmgr_destroy(&sc->rx_qmgr);
2302 
2303 	ice_deinit_hw(&sc->hw);
2304 }
2305 
2306 /**
2307  * ice_transition_safe_mode - Transition to safe mode
2308  * @sc: the device private softc
2309  *
2310  * Called when the driver attempts to reload the DDP package during a device
2311  * reset, and the new download fails. If so, we must transition to safe mode
2312  * at run time.
2313  *
2314  * @remark although safe mode normally allocates only a single queue, we can't
2315  * change the number of queues dynamically when using iflib. Due to this, we
2316  * do not attempt to reduce the number of queues.
2317  */
2318 static void
2319 ice_transition_safe_mode(struct ice_softc *sc)
2320 {
2321 	/* Indicate that we are in Safe mode */
2322 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
2323 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
2324 
2325 	ice_rdma_pf_detach(sc);
2326 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2327 
2328 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2329 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2330 
2331 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2332 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en);
2333 }
2334 
2335 /**
2336  * ice_if_update_admin_status - update admin status
2337  * @ctx: iflib ctx structure
2338  *
2339  * Called by iflib to update the admin status. For our purposes, this means
2340  * check the adminq, and update the link status. It's ultimately triggered by
2341  * our admin interrupt, or by the ice_if_timer periodically.
2342  *
2343  * @pre assumes the caller holds the iflib CTX lock
2344  */
2345 static void
2346 ice_if_update_admin_status(if_ctx_t ctx)
2347 {
2348 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2349 	enum ice_fw_modes fw_mode;
2350 	bool reschedule = false;
2351 	u16 pending = 0;
2352 
2353 	ASSERT_CTX_LOCKED(sc);
2354 
2355 	/* Check if the firmware entered recovery mode at run time */
2356 	fw_mode = ice_get_fw_mode(&sc->hw);
2357 	if (fw_mode == ICE_FW_MODE_REC) {
2358 		if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2359 			/* If we just entered recovery mode, log a warning to
2360 			 * the system administrator and deinit driver state
2361 			 * that is no longer functional.
2362 			 */
2363 			ice_transition_recovery_mode(sc);
2364 		}
2365 	} else if (fw_mode == ICE_FW_MODE_ROLLBACK) {
2366 		if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) {
2367 			/* Rollback mode isn't fatal, but we don't want to
2368 			 * repeatedly post a message about it.
2369 			 */
2370 			ice_print_rollback_msg(&sc->hw);
2371 		}
2372 	}
2373 
2374 	/* Handle global reset events */
2375 	ice_handle_reset_event(sc);
2376 
2377 	/* Handle PF reset requests */
2378 	ice_handle_pf_reset_request(sc);
2379 
2380 	/* Handle MDD events */
2381 	ice_handle_mdd_event(sc);
2382 
2383 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) ||
2384 	    ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) ||
2385 	    ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2386 		/*
2387 		 * If we know the control queues are disabled, skip processing
2388 		 * the control queues entirely.
2389 		 */
2390 		;
2391 	} else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) {
2392 		ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending);
2393 		if (pending > 0)
2394 			reschedule = true;
2395 
2396 		if (ice_is_generic_mac(&sc->hw)) {
2397 			ice_process_ctrlq(sc, ICE_CTL_Q_SB, &pending);
2398 			if (pending > 0)
2399 				reschedule = true;
2400 		}
2401 
2402 		ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending);
2403 		if (pending > 0)
2404 			reschedule = true;
2405 	}
2406 
2407 	/* Poll for link up */
2408 	ice_poll_for_media_avail(sc);
2409 
2410 	/* Check and update link status */
2411 	ice_update_link_status(sc, false);
2412 
2413 	/*
2414 	 * If there are still messages to process, we need to reschedule
2415 	 * ourselves. Otherwise, we can just re-enable the interrupt. We'll be
2416 	 * woken up at the next interrupt or timer event.
2417 	 */
2418 	if (reschedule) {
2419 		ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
2420 		iflib_admin_intr_deferred(ctx);
2421 	} else {
2422 		ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2423 	}
2424 }
2425 
2426 /**
2427  * ice_prepare_for_reset - Prepare device for an impending reset
2428  * @sc: The device private softc
2429  *
2430  * Prepare the driver for an impending reset, shutting down VSIs, clearing the
2431  * scheduler setup, and shutting down controlqs. Uses the
2432  * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the
2433  * driver for reset or not.
2434  */
2435 static void
2436 ice_prepare_for_reset(struct ice_softc *sc)
2437 {
2438 	struct ice_hw *hw = &sc->hw;
2439 
2440 	/* If we're already prepared, there's nothing to do */
2441 	if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET))
2442 		return;
2443 
2444 	log(LOG_INFO, "%s: preparing to reset device logic\n", if_name(sc->ifp));
2445 
2446 	/* In recovery mode, hardware is not initialized */
2447 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2448 		return;
2449 
2450 	/* inform the RDMA client */
2451 	ice_rdma_notify_reset(sc);
2452 	/* stop the RDMA client */
2453 	ice_rdma_pf_stop(sc);
2454 
2455 	/* Release the main PF VSI queue mappings */
2456 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2457 				    sc->pf_vsi.num_tx_queues);
2458 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2459 				    sc->pf_vsi.num_rx_queues);
2460 	if (sc->mirr_if) {
2461 		ice_resmgr_release_map(&sc->tx_qmgr, sc->mirr_if->vsi->tx_qmap,
2462 		    sc->mirr_if->num_irq_vectors);
2463 		ice_resmgr_release_map(&sc->rx_qmgr, sc->mirr_if->vsi->rx_qmap,
2464 		    sc->mirr_if->num_irq_vectors);
2465 	}
2466 
2467 	ice_clear_hw_tbls(hw);
2468 
2469 	if (hw->port_info)
2470 		ice_sched_cleanup_all(hw);
2471 
2472 	ice_shutdown_all_ctrlq(hw, false);
2473 }
2474 
2475 /**
2476  * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping
2477  * @sc: the device softc pointer
2478  *
2479  * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue
2480  * mapping after a reset occurred.
2481  */
2482 static int
2483 ice_rebuild_pf_vsi_qmap(struct ice_softc *sc)
2484 {
2485 	struct ice_vsi *vsi = &sc->pf_vsi;
2486 	struct ice_tx_queue *txq;
2487 	struct ice_rx_queue *rxq;
2488 	int err, i;
2489 
2490 	/* Re-assign Tx queues from PF space to the main VSI */
2491 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap,
2492 					    vsi->num_tx_queues);
2493 	if (err) {
2494 		device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n",
2495 			      ice_err_str(err));
2496 		return (err);
2497 	}
2498 
2499 	/* Re-assign Rx queues from PF space to this VSI */
2500 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap,
2501 					    vsi->num_rx_queues);
2502 	if (err) {
2503 		device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n",
2504 			      ice_err_str(err));
2505 		goto err_release_tx_queues;
2506 	}
2507 
2508 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
2509 
2510 	/* Re-assign Tx queue tail pointers */
2511 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
2512 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
2513 
2514 	/* Re-assign Rx queue tail pointers */
2515 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
2516 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
2517 
2518 	return (0);
2519 
2520 err_release_tx_queues:
2521 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2522 				   sc->pf_vsi.num_tx_queues);
2523 
2524 	return (err);
2525 }
2526 
2527 /* determine if the iflib context is active */
2528 #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING))
2529 
2530 /**
2531  * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode
2532  * @sc: The device private softc
2533  *
2534  * Handle a driver rebuild while in recovery mode. This will only rebuild the
2535  * limited functionality supported while in recovery mode.
2536  */
2537 static void
2538 ice_rebuild_recovery_mode(struct ice_softc *sc)
2539 {
2540 	device_t dev = sc->dev;
2541 
2542 	/* enable PCIe bus master */
2543 	pci_enable_busmaster(dev);
2544 
2545 	/* Configure interrupt causes for the administrative interrupt */
2546 	ice_configure_misc_interrupts(sc);
2547 
2548 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2549 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2550 
2551 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2552 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2553 
2554 	log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp));
2555 
2556 	/* In order to completely restore device functionality, the iflib core
2557 	 * needs to be reset. We need to request an iflib reset. Additionally,
2558 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2559 	 * the iflib core, we also want re-run the admin task so that iflib
2560 	 * resets immediately instead of waiting for the next interrupt.
2561 	 */
2562 	ice_request_stack_reinit(sc);
2563 
2564 	return;
2565 }
2566 
2567 /**
2568  * ice_rebuild - Rebuild driver state post reset
2569  * @sc: The device private softc
2570  *
2571  * Restore driver state after a reset occurred. Restart the controlqs, setup
2572  * the hardware port, and re-enable the VSIs.
2573  */
2574 static void
2575 ice_rebuild(struct ice_softc *sc)
2576 {
2577 	struct ice_hw *hw = &sc->hw;
2578 	device_t dev = sc->dev;
2579 	enum ice_ddp_state pkg_state;
2580 	int status;
2581 	int err;
2582 
2583 	sc->rebuild_ticks = ticks;
2584 
2585 	/* If we're rebuilding, then a reset has succeeded. */
2586 	ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED);
2587 
2588 	/*
2589 	 * If the firmware is in recovery mode, only restore the limited
2590 	 * functionality supported by recovery mode.
2591 	 */
2592 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2593 		ice_rebuild_recovery_mode(sc);
2594 		return;
2595 	}
2596 
2597 	/* enable PCIe bus master */
2598 	pci_enable_busmaster(dev);
2599 
2600 	status = ice_init_all_ctrlq(hw);
2601 	if (status) {
2602 		device_printf(dev, "failed to re-init controlqs, err %s\n",
2603 			      ice_status_str(status));
2604 		goto err_shutdown_ctrlq;
2605 	}
2606 
2607 	/* Query the allocated resources for Tx scheduler */
2608 	status = ice_sched_query_res_alloc(hw);
2609 	if (status) {
2610 		device_printf(dev,
2611 			      "Failed to query scheduler resources, err %s aq_err %s\n",
2612 			      ice_status_str(status),
2613 			      ice_aq_str(hw->adminq.sq_last_status));
2614 		goto err_shutdown_ctrlq;
2615 	}
2616 
2617 	/* Re-enable FW logging. Keep going even if this fails */
2618 	status = ICE_SUCCESS;
2619 	if (hw->pf_id == 0)
2620 		status = ice_fwlog_set(hw, &hw->fwlog_cfg);
2621 	if (!status) {
2622 		/*
2623 		 * We should have the most updated cached copy of the
2624 		 * configuration, regardless of whether we're rebuilding
2625 		 * or not.  So we'll simply check to see if logging was
2626 		 * enabled pre-rebuild.
2627 		 */
2628 		if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2629 			status = ice_fwlog_register(hw);
2630 			if (status)
2631 				device_printf(dev, "failed to re-register fw logging, err %s aq_err %s\n",
2632 				   ice_status_str(status),
2633 				   ice_aq_str(hw->adminq.sq_last_status));
2634 		}
2635 	} else
2636 		device_printf(dev, "failed to rebuild fw logging configuration, err %s aq_err %s\n",
2637 		   ice_status_str(status),
2638 		   ice_aq_str(hw->adminq.sq_last_status));
2639 
2640 	err = ice_send_version(sc);
2641 	if (err)
2642 		goto err_shutdown_ctrlq;
2643 
2644 	err = ice_init_link_events(sc);
2645 	if (err) {
2646 		device_printf(dev, "ice_init_link_events failed: %s\n",
2647 			      ice_err_str(err));
2648 		goto err_shutdown_ctrlq;
2649 	}
2650 
2651 	status = ice_clear_pf_cfg(hw);
2652 	if (status) {
2653 		device_printf(dev, "failed to clear PF configuration, err %s\n",
2654 			      ice_status_str(status));
2655 		goto err_shutdown_ctrlq;
2656 	}
2657 
2658 	ice_clean_all_vsi_rss_cfg(sc);
2659 
2660 	ice_clear_pxe_mode(hw);
2661 
2662 	status = ice_get_caps(hw);
2663 	if (status) {
2664 		device_printf(dev, "failed to get capabilities, err %s\n",
2665 			      ice_status_str(status));
2666 		goto err_shutdown_ctrlq;
2667 	}
2668 
2669 	status = ice_sched_init_port(hw->port_info);
2670 	if (status) {
2671 		device_printf(dev, "failed to initialize port, err %s\n",
2672 			      ice_status_str(status));
2673 		goto err_sched_cleanup;
2674 	}
2675 
2676 	/* If we previously loaded the package, it needs to be reloaded now */
2677 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) {
2678 		pkg_state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
2679 		if (!ice_is_init_pkg_successful(pkg_state)) {
2680 			ice_log_pkg_init(sc, pkg_state);
2681 			ice_transition_safe_mode(sc);
2682 		}
2683 	}
2684 
2685 	ice_reset_pf_stats(sc);
2686 
2687 	err = ice_rebuild_pf_vsi_qmap(sc);
2688 	if (err) {
2689 		device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n",
2690 			      ice_err_str(err));
2691 		goto err_sched_cleanup;
2692 	}
2693 	err = ice_initialize_vsi(&sc->pf_vsi);
2694 	if (err) {
2695 		device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n",
2696 			      ice_err_str(err));
2697 		goto err_release_queue_allocations;
2698 	}
2699 
2700 	/* Replay all VSI configuration */
2701 	err = ice_replay_all_vsi_cfg(sc);
2702 	if (err)
2703 		goto err_deinit_pf_vsi;
2704 
2705 	/* Re-enable FW health event reporting */
2706 	ice_init_health_events(sc);
2707 
2708 	/* Reconfigure the main PF VSI for RSS */
2709 	err = ice_config_rss(&sc->pf_vsi);
2710 	if (err) {
2711 		device_printf(sc->dev,
2712 			      "Unable to reconfigure RSS for the main VSI, err %s\n",
2713 			      ice_err_str(err));
2714 		goto err_deinit_pf_vsi;
2715 	}
2716 
2717 	if (hw->port_info->qos_cfg.is_sw_lldp)
2718 		ice_add_rx_lldp_filter(sc);
2719 
2720 	/* Apply previous link settings and refresh link status, if PHY
2721 	 * FW is ready.
2722 	 */
2723 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2724 	ice_init_link(sc);
2725 
2726 	/* RDMA interface will be restarted by the stack re-init */
2727 
2728 	/* Configure interrupt causes for the administrative interrupt */
2729 	ice_configure_misc_interrupts(sc);
2730 
2731 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2732 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2733 
2734 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2735 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2736 
2737 	/* Reconfigure the subinterface */
2738 	if (sc->mirr_if) {
2739 		err = ice_subif_rebuild(sc);
2740 		if (err)
2741 			goto err_deinit_pf_vsi;
2742 	}
2743 
2744 	log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp));
2745 
2746 	/* In order to completely restore device functionality, the iflib core
2747 	 * needs to be reset. We need to request an iflib reset. Additionally,
2748 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2749 	 * the iflib core, we also want re-run the admin task so that iflib
2750 	 * resets immediately instead of waiting for the next interrupt.
2751 	 * If LLDP is enabled we need to reconfig DCB to properly reinit all TC
2752 	 * queues, not only 0. It contains ice_request_stack_reinit as well.
2753 	 */
2754 	if (hw->port_info->qos_cfg.is_sw_lldp)
2755 		ice_request_stack_reinit(sc);
2756 	else
2757 		ice_do_dcb_reconfig(sc, false);
2758 
2759 	return;
2760 
2761 err_deinit_pf_vsi:
2762 	ice_deinit_vsi(&sc->pf_vsi);
2763 err_release_queue_allocations:
2764 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2765 				    sc->pf_vsi.num_tx_queues);
2766 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2767 				    sc->pf_vsi.num_rx_queues);
2768 err_sched_cleanup:
2769 	ice_sched_cleanup_all(hw);
2770 err_shutdown_ctrlq:
2771 	ice_shutdown_all_ctrlq(hw, false);
2772 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2773 	ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2774 	device_printf(dev, "Driver rebuild failed, please reload the device driver\n");
2775 }
2776 
2777 /**
2778  * ice_handle_reset_event - Handle reset events triggered by OICR
2779  * @sc: The device private softc
2780  *
2781  * Handle reset events triggered by an OICR notification. This includes CORER,
2782  * GLOBR, and EMPR resets triggered by software on this or any other PF or by
2783  * firmware.
2784  *
2785  * @pre assumes the iflib context lock is held, and will unlock it while
2786  * waiting for the hardware to finish reset.
2787  */
2788 static void
2789 ice_handle_reset_event(struct ice_softc *sc)
2790 {
2791 	struct ice_hw *hw = &sc->hw;
2792 	int status;
2793 	device_t dev = sc->dev;
2794 
2795 	/* When a CORER, GLOBR, or EMPR is about to happen, the hardware will
2796 	 * trigger an OICR interrupt. Our OICR handler will determine when
2797 	 * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as
2798 	 * appropriate.
2799 	 */
2800 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
2801 		return;
2802 
2803 	ice_prepare_for_reset(sc);
2804 
2805 	/*
2806 	 * Release the iflib context lock and wait for the device to finish
2807 	 * resetting.
2808 	 */
2809 	IFLIB_CTX_UNLOCK(sc);
2810 
2811 #define ICE_EMPR_ADDL_WAIT_MSEC_SLOW		20000
2812 	if ((ice_is_e830(hw) || ice_is_e825c(hw)) &&
2813 	    (((rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
2814 	         GLGEN_RSTAT_RESET_TYPE_S) == ICE_RESET_EMPR))
2815 			ice_msec_pause(ICE_EMPR_ADDL_WAIT_MSEC_SLOW);
2816 
2817 	status = ice_check_reset(hw);
2818 	IFLIB_CTX_LOCK(sc);
2819 	if (status) {
2820 		device_printf(dev, "Device never came out of reset, err %s\n",
2821 			      ice_status_str(status));
2822 
2823 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2824 		ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
2825 		ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2826 		device_printf(dev, "Reset failed; please reload the device driver\n");
2827 		return;
2828 	}
2829 
2830 	/* We're done with the reset, so we can rebuild driver state */
2831 	sc->hw.reset_ongoing = false;
2832 	ice_rebuild(sc);
2833 
2834 	/* In the unlikely event that a PF reset request occurs at the same
2835 	 * time as a global reset, clear the request now. This avoids
2836 	 * resetting a second time right after we reset due to a global event.
2837 	 */
2838 	if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2839 		device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n");
2840 }
2841 
2842 /**
2843  * ice_handle_pf_reset_request - Initiate PF reset requested by software
2844  * @sc: The device private softc
2845  *
2846  * Initiate a PF reset requested by software. We handle this in the admin task
2847  * so that only one thread actually handles driver preparation and cleanup,
2848  * rather than having multiple threads possibly attempt to run this code
2849  * simultaneously.
2850  *
2851  * @pre assumes the iflib context lock is held and will unlock it while
2852  * waiting for the PF reset to complete.
2853  */
2854 static void
2855 ice_handle_pf_reset_request(struct ice_softc *sc)
2856 {
2857 	struct ice_hw *hw = &sc->hw;
2858 	int status;
2859 
2860 	/* Check for PF reset requests */
2861 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2862 		return;
2863 
2864 	/* Make sure we're prepared for reset */
2865 	ice_prepare_for_reset(sc);
2866 
2867 	/*
2868 	 * Release the iflib context lock and wait for the device to finish
2869 	 * resetting.
2870 	 */
2871 	IFLIB_CTX_UNLOCK(sc);
2872 	status = ice_reset(hw, ICE_RESET_PFR);
2873 	IFLIB_CTX_LOCK(sc);
2874 	if (status) {
2875 		device_printf(sc->dev, "device PF reset failed, err %s\n",
2876 			      ice_status_str(status));
2877 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2878 		return;
2879 	}
2880 
2881 	sc->soft_stats.pfr_count++;
2882 	ice_rebuild(sc);
2883 }
2884 
2885 /**
2886  * ice_init_device_features - Init device driver features
2887  * @sc: driver softc structure
2888  *
2889  * @pre assumes that the function capabilities bits have been set up by
2890  * ice_init_hw().
2891  */
2892 static void
2893 ice_init_device_features(struct ice_softc *sc)
2894 {
2895 	struct ice_hw *hw = &sc->hw;
2896 
2897 	/* Set capabilities that all devices support */
2898 	ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2899 	ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
2900 	ice_set_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2901 	ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
2902 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap);
2903 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap);
2904 	ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2905 	ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2906 	ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_cap);
2907 	ice_set_bit(ICE_FEATURE_DCB, sc->feat_cap);
2908 	ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2909 	ice_set_bit(ICE_FEATURE_PHY_STATISTICS, sc->feat_cap);
2910 
2911 	if (ice_is_e810(hw))
2912 		ice_set_bit(ICE_FEATURE_PHY_STATISTICS, sc->feat_en);
2913 
2914 	if (ice_is_e825c(hw))
2915 		ice_set_bit(ICE_FEATURE_DUAL_NAC, sc->feat_cap);
2916 	/* Disable features due to hardware limitations... */
2917 	if (!hw->func_caps.common_cap.rss_table_size)
2918 		ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2919 	if (!hw->func_caps.common_cap.iwarp || !ice_enable_irdma)
2920 		ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2921 	if (!hw->func_caps.common_cap.dcb)
2922 		ice_clear_bit(ICE_FEATURE_DCB, sc->feat_cap);
2923 	/* Disable features due to firmware limitations... */
2924 	if (!ice_is_fw_health_report_supported(hw))
2925 		ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2926 	if (!ice_fwlog_supported(hw))
2927 		ice_clear_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2928 	if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2929 		if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_FW_LOGGING))
2930 			ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_en);
2931 		else
2932 			ice_fwlog_unregister(hw);
2933 	}
2934 
2935 	/* Disable capabilities not supported by the OS */
2936 	ice_disable_unsupported_features(sc->feat_cap);
2937 
2938 	/* RSS is always enabled for iflib */
2939 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS))
2940 		ice_set_bit(ICE_FEATURE_RSS, sc->feat_en);
2941 
2942 	/* Disable features based on sysctl settings */
2943 	if (!ice_tx_balance_en)
2944 		ice_clear_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2945 
2946 	if (hw->dev_caps.supported_sensors & ICE_SENSOR_SUPPORT_E810_INT_TEMP) {
2947 		ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_cap);
2948 		ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_en);
2949 	}
2950 
2951 	if (hw->func_caps.common_cap.next_cluster_id_support ||
2952 	    hw->dev_caps.common_cap.next_cluster_id_support) {
2953 		ice_set_bit(ICE_FEATURE_NEXT_CLUSTER_ID, sc->feat_cap);
2954 		ice_set_bit(ICE_FEATURE_NEXT_CLUSTER_ID, sc->feat_en);
2955 	}
2956 }
2957 
2958 /**
2959  * ice_if_multi_set - Callback to update Multicast filters in HW
2960  * @ctx: iflib ctx structure
2961  *
2962  * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search
2963  * the if_multiaddrs list and determine which filters have been added or
2964  * removed from the list, and update HW programming to reflect the new list.
2965  *
2966  * @pre assumes the caller holds the iflib CTX lock
2967  */
2968 static void
2969 ice_if_multi_set(if_ctx_t ctx)
2970 {
2971 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2972 	int err;
2973 
2974 	ASSERT_CTX_LOCKED(sc);
2975 
2976 	/* Do not handle multicast configuration in recovery mode */
2977 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2978 		return;
2979 
2980 	err = ice_sync_multicast_filters(sc);
2981 	if (err) {
2982 		device_printf(sc->dev,
2983 			      "Failed to synchronize multicast filter list: %s\n",
2984 			      ice_err_str(err));
2985 		return;
2986 	}
2987 }
2988 
2989 /**
2990  * ice_if_vlan_register - Register a VLAN with the hardware
2991  * @ctx: iflib ctx pointer
2992  * @vtag: VLAN to add
2993  *
2994  * Programs the main PF VSI with a hardware filter for the given VLAN.
2995  *
2996  * @pre assumes the caller holds the iflib CTX lock
2997  */
2998 static void
2999 ice_if_vlan_register(if_ctx_t ctx, u16 vtag)
3000 {
3001 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3002 	int status;
3003 
3004 	ASSERT_CTX_LOCKED(sc);
3005 
3006 	/* Do not handle VLAN configuration in recovery mode */
3007 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
3008 		return;
3009 
3010 	status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag);
3011 	if (status) {
3012 		device_printf(sc->dev,
3013 			      "Failure adding VLAN %d to main VSI, err %s aq_err %s\n",
3014 			      vtag, ice_status_str(status),
3015 			      ice_aq_str(sc->hw.adminq.sq_last_status));
3016 	}
3017 }
3018 
3019 /**
3020  * ice_if_vlan_unregister - Remove a VLAN filter from the hardware
3021  * @ctx: iflib ctx pointer
3022  * @vtag: VLAN to add
3023  *
3024  * Removes the previously programmed VLAN filter from the main PF VSI.
3025  *
3026  * @pre assumes the caller holds the iflib CTX lock
3027  */
3028 static void
3029 ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag)
3030 {
3031 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3032 	int status;
3033 
3034 	ASSERT_CTX_LOCKED(sc);
3035 
3036 	/* Do not handle VLAN configuration in recovery mode */
3037 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
3038 		return;
3039 
3040 	status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag);
3041 	if (status) {
3042 		device_printf(sc->dev,
3043 			      "Failure removing VLAN %d from main VSI, err %s aq_err %s\n",
3044 			      vtag, ice_status_str(status),
3045 			      ice_aq_str(sc->hw.adminq.sq_last_status));
3046 	}
3047 }
3048 
3049 /**
3050  * ice_if_stop - Stop the device
3051  * @ctx: iflib context structure
3052  *
3053  * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0
3054  * down)
3055  *
3056  * @pre assumes the caller holds the iflib CTX lock
3057  */
3058 static void
3059 ice_if_stop(if_ctx_t ctx)
3060 {
3061 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3062 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3063 
3064 	ASSERT_CTX_LOCKED(sc);
3065 
3066 	/*
3067 	 * The iflib core may call IFDI_STOP prior to the first call to
3068 	 * IFDI_INIT. This will cause us to attempt to remove MAC filters we
3069 	 * don't have, and disable Tx queues which aren't yet configured.
3070 	 * Although it is likely these extra operations are harmless, they do
3071 	 * cause spurious warning messages to be displayed, which may confuse
3072 	 * users.
3073 	 *
3074 	 * To avoid these messages, we use a state bit indicating if we've
3075 	 * been initialized. It will be set when ice_if_init is called, and
3076 	 * cleared here in ice_if_stop.
3077 	 */
3078 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
3079 		return;
3080 
3081 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
3082 		device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n");
3083 		return;
3084 	}
3085 
3086 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
3087 		device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n");
3088 		return;
3089 	}
3090 
3091 	ice_rdma_pf_stop(sc);
3092 
3093 	/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
3094 	 * return of these functions because there's nothing we can really do
3095 	 * if they fail, and the functions already print error messages.
3096 	 * Just try to shut down as much as we can.
3097 	 */
3098 	ice_rm_pf_default_mac_filters(sc);
3099 
3100 	/* Dissociate the Tx and Rx queues from the interrupts */
3101 	ice_flush_txq_interrupts(&sc->pf_vsi);
3102 	ice_flush_rxq_interrupts(&sc->pf_vsi);
3103 
3104 	/* Disable the Tx and Rx queues */
3105 	ice_vsi_disable_tx(&sc->pf_vsi);
3106 	ice_control_all_rx_queues(&sc->pf_vsi, false);
3107 
3108 	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
3109 		 !(if_getflags(sc->ifp) & IFF_UP) && sc->link_up)
3110 		ice_set_link(sc, false);
3111 
3112 	if (sc->mirr_if && ice_test_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT)) {
3113 		ice_subif_if_stop(sc->mirr_if->subctx);
3114 		device_printf(sc->dev, "The subinterface also comes down and up after reset\n");
3115 	}
3116 }
3117 
3118 /**
3119  * ice_if_get_counter - Get current value of an ifnet statistic
3120  * @ctx: iflib context pointer
3121  * @counter: ifnet counter to read
3122  *
3123  * Reads the current value of an ifnet counter for the device.
3124  *
3125  * This function is not protected by the iflib CTX lock.
3126  */
3127 static uint64_t
3128 ice_if_get_counter(if_ctx_t ctx, ift_counter counter)
3129 {
3130 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3131 
3132 	/* Return the counter for the main PF VSI */
3133 	return ice_get_ifnet_counter(&sc->pf_vsi, counter);
3134 }
3135 
3136 /**
3137  * ice_request_stack_reinit - Request that iflib re-initialize
3138  * @sc: the device private softc
3139  *
3140  * Request that the device be brought down and up, to re-initialize. For
3141  * example, this may be called when a device reset occurs, or when Tx and Rx
3142  * queues need to be re-initialized.
3143  *
3144  * This is required because the iflib state is outside the driver, and must be
3145  * re-initialized if we need to resart Tx and Rx queues.
3146  */
3147 void
3148 ice_request_stack_reinit(struct ice_softc *sc)
3149 {
3150 	if (CTX_ACTIVE(sc->ctx)) {
3151 		iflib_request_reset(sc->ctx);
3152 		iflib_admin_intr_deferred(sc->ctx);
3153 	}
3154 }
3155 
3156 /**
3157  * ice_driver_is_detaching - Check if the driver is detaching/unloading
3158  * @sc: device private softc
3159  *
3160  * Returns true if the driver is detaching, false otherwise.
3161  *
3162  * @remark on newer kernels, take advantage of iflib_in_detach in order to
3163  * report detachment correctly as early as possible.
3164  *
3165  * @remark this function is used by various code paths that want to avoid
3166  * running if the driver is about to be removed. This includes sysctls and
3167  * other driver access points. Note that it does not fully resolve
3168  * detach-based race conditions as it is possible for a thread to race with
3169  * iflib_in_detach.
3170  */
3171 bool
3172 ice_driver_is_detaching(struct ice_softc *sc)
3173 {
3174 	return (ice_test_state(&sc->state, ICE_STATE_DETACHING) ||
3175 		iflib_in_detach(sc->ctx));
3176 }
3177 
3178 /**
3179  * ice_if_priv_ioctl - Device private ioctl handler
3180  * @ctx: iflib context pointer
3181  * @command: The ioctl command issued
3182  * @data: ioctl specific data
3183  *
3184  * iflib callback for handling custom driver specific ioctls.
3185  *
3186  * @pre Assumes that the iflib context lock is held.
3187  */
3188 static int
3189 ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
3190 {
3191 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3192 	struct ifdrv *ifd;
3193 	device_t dev = sc->dev;
3194 
3195 	if (data == NULL)
3196 		return (EINVAL);
3197 
3198 	ASSERT_CTX_LOCKED(sc);
3199 
3200 	/* Make sure the command type is valid */
3201 	switch (command) {
3202 	case SIOCSDRVSPEC:
3203 	case SIOCGDRVSPEC:
3204 		/* Accepted commands */
3205 		break;
3206 	case SIOCGPRIVATE_0:
3207 		/*
3208 		 * Although we do not support this ioctl command, it's
3209 		 * expected that iflib will forward it to the IFDI_PRIV_IOCTL
3210 		 * handler. Do not print a message in this case
3211 		 */
3212 		return (ENOTSUP);
3213 	default:
3214 		/*
3215 		 * If we get a different command for this function, it's
3216 		 * definitely unexpected, so log a message indicating what
3217 		 * command we got for debugging purposes.
3218 		 */
3219 		device_printf(dev, "%s: unexpected ioctl command %08lx\n",
3220 			      __func__, command);
3221 		return (EINVAL);
3222 	}
3223 
3224 	ifd = (struct ifdrv *)data;
3225 
3226 	switch (ifd->ifd_cmd) {
3227 	case ICE_NVM_ACCESS:
3228 		return ice_handle_nvm_access_ioctl(sc, ifd);
3229 	case ICE_DEBUG_DUMP:
3230 		return ice_handle_debug_dump_ioctl(sc, ifd);
3231 	default:
3232 		return EINVAL;
3233 	}
3234 }
3235 
3236 /**
3237  * ice_if_i2c_req - I2C request handler for iflib
3238  * @ctx: iflib context pointer
3239  * @req: The I2C parameters to use
3240  *
3241  * Read from the port's I2C eeprom using the parameters from the ioctl.
3242  *
3243  * @remark The iflib-only part is pretty simple.
3244  */
3245 static int
3246 ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req)
3247 {
3248 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3249 
3250 	return ice_handle_i2c_req(sc, req);
3251 }
3252 
3253 /**
3254  * ice_if_suspend - PCI device suspend handler for iflib
3255  * @ctx: iflib context pointer
3256  *
3257  * Deinitializes the driver and clears HW resources in preparation for
3258  * suspend or an FLR.
3259  *
3260  * @returns 0; this return value is ignored
3261  */
3262 static int
3263 ice_if_suspend(if_ctx_t ctx)
3264 {
3265 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3266 
3267 	/* At least a PFR is always going to happen after this;
3268 	 * either via FLR or during the D3->D0 transition.
3269 	 */
3270 	ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
3271 
3272 	ice_prepare_for_reset(sc);
3273 
3274 	return (0);
3275 }
3276 
3277 /**
3278  * ice_if_resume - PCI device resume handler for iflib
3279  * @ctx: iflib context pointer
3280  *
3281  * Reinitializes the driver and the HW after PCI resume or after
3282  * an FLR. An init is performed by iflib after this function is finished.
3283  *
3284  * @returns 0; this return value is ignored
3285  */
3286 static int
3287 ice_if_resume(if_ctx_t ctx)
3288 {
3289 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3290 
3291 	ice_rebuild(sc);
3292 
3293 	return (0);
3294 }
3295 
3296 /**
3297  * ice_if_needs_restart - Tell iflib when the driver needs to be reinitialized
3298  * @ctx: iflib context pointer
3299  * @event: event code to check
3300  *
3301  * Defaults to returning true for unknown events.
3302  *
3303  * @returns true if iflib needs to reinit the interface
3304  */
3305 static bool
3306 ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event)
3307 {
3308 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3309 
3310 	switch (event) {
3311 	case IFLIB_RESTART_VLAN_CONFIG:
3312 		if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
3313 			 !(if_getflags(sc->ifp) & IFF_UP))
3314 			return false;
3315 	default:
3316 		return true;
3317 	}
3318 }
3319 
3320 /**
3321  * ice_init_link - Do link configuration and link status reporting
3322  * @sc: driver private structure
3323  *
3324  * Contains an extra check that skips link config when an E830 device
3325  * does not have the "FW_LOADING"/"PHYBUSY" bit set in GL_MNG_FWSM set.
3326  */
3327 static void
3328 ice_init_link(struct ice_softc *sc)
3329 {
3330 	struct ice_hw *hw = &sc->hw;
3331 	device_t dev = sc->dev;
3332 
3333 	/* Check if FW is ready before setting up link; defer setup to the
3334 	 * admin task if it isn't.
3335 	 */
3336 	if (ice_is_e830(hw) &&
3337 	    (rd32(hw, GL_MNG_FWSM) & GL_MNG_FWSM_FW_LOADING_M)) {
3338 		ice_set_state(&sc->state, ICE_STATE_PHY_FW_INIT_PENDING);
3339 		device_printf(dev,
3340 		    "Link initialization is blocked by PHY FW initialization.\n");
3341 		device_printf(dev,
3342 		    "Link initialization will continue after PHY FW initialization completes.\n");
3343 		/* Do not access PHY config while PHY FW is busy initializing */
3344 	} else {
3345 		ice_clear_state(&sc->state, ICE_STATE_PHY_FW_INIT_PENDING);
3346 		ice_init_link_configuration(sc);
3347 		ice_update_link_status(sc, true);
3348 	}
3349 
3350 }
3351 
3352 extern struct if_txrx ice_subif_txrx;
3353 
3354 /**
3355  * @var ice_subif_methods
3356  * @brief ice driver method entry points
3357  */
3358 static device_method_t ice_subif_methods[] = {
3359 	/* Device interface */
3360 	DEVMETHOD(device_register, ice_subif_register),
3361 	DEVMETHOD_END
3362 };
3363 
3364 /**
3365  * @var ice_subif_driver
3366  * @brief driver structure for the device API
3367  */
3368 static driver_t ice_subif_driver = {
3369 	.name = "ice_subif",
3370 	.methods = ice_subif_methods,
3371 	.size = sizeof(struct ice_mirr_if),
3372 };
3373 
3374 static device_method_t ice_iflib_subif_methods[] = {
3375 	DEVMETHOD(ifdi_attach_pre, ice_subif_if_attach_pre),
3376 	DEVMETHOD(ifdi_attach_post, ice_subif_if_attach_post),
3377 	DEVMETHOD(ifdi_tx_queues_alloc, ice_subif_if_tx_queues_alloc),
3378 	DEVMETHOD(ifdi_rx_queues_alloc, ice_subif_if_rx_queues_alloc),
3379 	DEVMETHOD(ifdi_msix_intr_assign, ice_subif_if_msix_intr_assign),
3380 	DEVMETHOD(ifdi_intr_enable, ice_subif_if_intr_enable),
3381 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_subif_if_rx_queue_intr_enable),
3382 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_subif_if_tx_queue_intr_enable),
3383 	DEVMETHOD(ifdi_init, ice_subif_if_init),
3384 	DEVMETHOD(ifdi_stop, ice_subif_if_stop),
3385 	DEVMETHOD(ifdi_queues_free, ice_subif_if_queues_free),
3386 	DEVMETHOD(ifdi_media_status, ice_subif_if_media_status),
3387 	DEVMETHOD(ifdi_promisc_set, ice_subif_if_promisc_set),
3388 };
3389 
3390 /**
3391  * @var ice_iflib_subif_driver
3392  * @brief driver structure for the iflib stack
3393  *
3394  * driver_t definition used to setup the iflib device methods.
3395  */
3396 static driver_t ice_iflib_subif_driver = {
3397 	.name = "ice_subif",
3398 	.methods = ice_iflib_subif_methods,
3399 	.size = sizeof(struct ice_mirr_if),
3400 };
3401 
3402 /**
3403  * @var ice_subif_sctx
3404  * @brief ice driver shared context
3405  *
3406  * Similar to the existing ice_sctx, this structure has these differences:
3407  * - isc_admin_intrcnt is set to 0
3408  * - Uses subif iflib driver methods
3409  * - Flagged as a VF for iflib
3410  */
3411 static struct if_shared_ctx ice_subif_sctx = {
3412 	.isc_magic = IFLIB_MAGIC,
3413 	.isc_q_align = PAGE_SIZE,
3414 
3415 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
3416 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
3417 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
3418 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
3419 
3420 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
3421 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
3422 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
3423 
3424 	.isc_nfl = 1,
3425 	.isc_ntxqs = 1,
3426 	.isc_nrxqs = 1,
3427 
3428 	.isc_admin_intrcnt = 0,
3429 	.isc_vendor_info = ice_vendor_info_array,
3430 	.isc_driver_version = __DECONST(char *, ice_driver_version),
3431 	.isc_driver = &ice_iflib_subif_driver,
3432 
3433 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
3434 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX |
3435 		IFLIB_IS_VF,
3436 
3437 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
3438 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
3439 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
3440 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
3441 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
3442 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
3443 };
3444 
3445 static void *
3446 ice_subif_register(device_t dev __unused)
3447 {
3448 	return (&ice_subif_sctx);
3449 }
3450 
3451 static void
3452 ice_subif_setup_scctx(struct ice_mirr_if *mif)
3453 {
3454 	if_softc_ctx_t scctx = mif->subscctx;
3455 
3456 	scctx->isc_txrx = &ice_subif_txrx;
3457 
3458 	scctx->isc_capenable = ICE_FULL_CAPS;
3459 	scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
3460 
3461 	scctx->isc_ntxqsets = 4;
3462 	scctx->isc_nrxqsets = 4;
3463 	scctx->isc_vectors = scctx->isc_nrxqsets;
3464 
3465 	scctx->isc_ntxqsets_max = 256;
3466 	scctx->isc_nrxqsets_max = 256;
3467 
3468 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
3469 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
3470 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
3471 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
3472 
3473 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
3474 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
3475 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
3476 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
3477 }
3478 
3479 static int
3480 ice_subif_if_attach_pre(if_ctx_t ctx)
3481 {
3482 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3483 	device_t dev = iflib_get_dev(ctx);
3484 
3485 	mif->subctx = ctx;
3486 	mif->subdev = dev;
3487 	mif->subscctx = iflib_get_softc_ctx(ctx);
3488 
3489 	/* Setup the iflib softc context structure */
3490 	ice_subif_setup_scctx(mif);
3491 
3492 	return (0);
3493 }
3494 
3495 static int
3496 ice_subif_if_attach_post(if_ctx_t ctx __unused)
3497 {
3498 	return (0);
3499 }
3500 
3501 /**
3502  * ice_destroy_mirror_interface - destroy mirror interface
3503  * @sc: driver private data
3504  *
3505  * Destroys all resources associated with the mirroring interface.
3506  * Will not exit early on failure.
3507  *
3508  * @pre: Mirror interface already exists and is initialized.
3509  */
3510 void
3511 ice_destroy_mirror_interface(struct ice_softc *sc)
3512 {
3513 	struct ice_mirr_if *mif = sc->mirr_if;
3514 	struct ice_vsi *vsi = mif->vsi;
3515 	bool is_locked = false;
3516 	int ret;
3517 
3518 	is_locked = sx_xlocked(sc->iflib_ctx_lock);
3519 	if (is_locked)
3520 		IFLIB_CTX_UNLOCK(sc);
3521 
3522 	if (mif->ifp) {
3523 		ret = iflib_device_deregister(mif->subctx);
3524 		if (ret) {
3525 			device_printf(sc->dev,
3526 			    "iflib_device_deregister for mirror interface failed: %d\n",
3527 			    ret);
3528 		}
3529 	}
3530 
3531 	bus_topo_lock();
3532 	ret = device_delete_child(sc->dev, mif->subdev);
3533 	bus_topo_unlock();
3534 	if (ret) {
3535 		device_printf(sc->dev,
3536 		    "device_delete_child for mirror interface failed: %d\n",
3537 		    ret);
3538 	}
3539 
3540 	if (is_locked)
3541 		IFLIB_CTX_LOCK(sc);
3542 
3543 	if (mif->if_imap) {
3544 		free(mif->if_imap, M_ICE);
3545 		mif->if_imap = NULL;
3546 	}
3547 	if (mif->os_imap) {
3548 		free(mif->os_imap, M_ICE);
3549 		mif->os_imap = NULL;
3550 	}
3551 
3552 	/* These are freed via ice_subif_queues_free_subif
3553 	 * vsi:
3554 	 * - rx_irqvs
3555 	 * - tx_queues
3556 	 * - rx_queues
3557 	 */
3558 	ice_release_vsi(vsi);
3559 
3560 	free(mif, M_ICE);
3561 	sc->mirr_if = NULL;
3562 
3563 }
3564 
3565 /**
3566  * ice_setup_mirror_vsi - Initialize mirror VSI
3567  * @mif: driver private data for mirror interface
3568  *
3569  * Allocates a VSI for a mirror interface, and sets that VSI up for use as a
3570  * mirror for the main PF VSI.
3571  *
3572  * Returns 0 on success, or a standard error code on failure.
3573  */
3574 static int
3575 ice_setup_mirror_vsi(struct ice_mirr_if *mif)
3576 {
3577 	struct ice_softc *sc = mif->back;
3578 	device_t dev = sc->dev;
3579 	struct ice_vsi *vsi;
3580 	int ret = 0;
3581 
3582 	/* vsi is for the new mirror vsi, not the PF's main VSI */
3583 	vsi = ice_alloc_vsi(sc, ICE_VSI_VMDQ2);
3584 	if (!vsi) {
3585 		/* Already prints an error message */
3586 		return (ENOMEM);
3587 	}
3588 	mif->vsi = vsi;
3589 
3590 	/* Reserve VSI queue allocation from PF queues */
3591 	ice_alloc_vsi_qmap(vsi, ICE_DEFAULT_VF_QUEUES, ICE_DEFAULT_VF_QUEUES);
3592 	vsi->num_tx_queues = vsi->num_rx_queues = ICE_DEFAULT_VF_QUEUES;
3593 
3594 	/* Assign Tx queues from PF space */
3595 	ret = ice_resmgr_assign_scattered(&sc->tx_qmgr, vsi->tx_qmap,
3596 	    vsi->num_tx_queues);
3597 	if (ret) {
3598 		device_printf(dev, "Unable to assign mirror VSI Tx queues: %s\n",
3599 		    ice_err_str(ret));
3600 		goto release_vsi;
3601 	}
3602 	/* Assign Rx queues from PF space */
3603 	ret = ice_resmgr_assign_scattered(&sc->rx_qmgr, vsi->rx_qmap,
3604 	    vsi->num_rx_queues);
3605 	if (ret) {
3606 		device_printf(dev, "Unable to assign mirror VSI Rx queues: %s\n",
3607 		    ice_err_str(ret));
3608 		goto release_vsi;
3609 	}
3610 	vsi->qmap_type = ICE_RESMGR_ALLOC_SCATTERED;
3611 	vsi->max_frame_size = ICE_MAX_FRAME_SIZE;
3612 
3613 	ret = ice_initialize_vsi(vsi);
3614 	if (ret) {
3615 		device_printf(dev, "%s: Error in ice_initialize_vsi for mirror VSI: %s\n",
3616 		    __func__, ice_err_str(ret));
3617 		goto release_vsi;
3618 	}
3619 
3620 	/* Setup this VSI for receiving traffic */
3621 	ret = ice_config_rss(vsi);
3622 	if (ret) {
3623 		device_printf(dev,
3624 		    "Unable to configure RSS for mirror VSI: %s\n",
3625 		    ice_err_str(ret));
3626 		goto release_vsi;
3627 	}
3628 
3629 	/* Set HW rules for mirroring traffic */
3630 	vsi->mirror_src_vsi = sc->pf_vsi.idx;
3631 
3632 	ice_debug(&sc->hw, ICE_DBG_INIT,
3633 	    "Configuring mirroring from VSI %d to %d\n",
3634 	    vsi->mirror_src_vsi, vsi->idx);
3635 	ice_debug(&sc->hw, ICE_DBG_INIT, "(HW num: VSI %d to %d)\n",
3636 	    ice_get_hw_vsi_num(&sc->hw, vsi->mirror_src_vsi),
3637 	    ice_get_hw_vsi_num(&sc->hw, vsi->idx));
3638 
3639 	ret = ice_setup_vsi_mirroring(vsi);
3640 	if (ret) {
3641 		device_printf(dev,
3642 		    "Unable to configure mirroring for VSI: %s\n",
3643 		    ice_err_str(ret));
3644 		goto release_vsi;
3645 	}
3646 
3647 	return (0);
3648 
3649 release_vsi:
3650 	ice_release_vsi(vsi);
3651 	mif->vsi = NULL;
3652 	return (ret);
3653 }
3654 
3655 /**
3656  * ice_create_mirror_interface - Initialize mirror interface
3657  * @sc: driver private data
3658  *
3659  * Creates and sets up a mirror interface that will mirror traffic from
3660  * the main PF interface. Includes a call to iflib_device_register() in order
3661  * to setup necessary iflib structures for this new interface as well.
3662  *
3663  * If it returns successfully, a new interface will be created and will show
3664  * up in the ifconfig interface list.
3665  *
3666  * Returns 0 on success, or a standard error code on failure.
3667  */
3668 int
3669 ice_create_mirror_interface(struct ice_softc *sc)
3670 {
3671 	device_t dev = sc->dev;
3672 	struct ice_mirr_if *mif;
3673 	struct ifmedia *media;
3674 	struct sbuf *sb;
3675 	int ret = 0;
3676 
3677 	mif = (struct ice_mirr_if *)malloc(sizeof(*mif), M_ICE, M_ZERO | M_NOWAIT);
3678 	if (!mif) {
3679 		device_printf(dev, "malloc() error allocating mirror interface\n");
3680 		return (ENOMEM);
3681 	}
3682 
3683 	/* Set pointers */
3684 	sc->mirr_if = mif;
3685 	mif->back = sc;
3686 
3687 	/* Do early setup because these will be called during iflib_device_register():
3688 	 * - ice_subif_if_tx_queues_alloc
3689 	 * - ice_subif_if_rx_queues_alloc
3690 	 */
3691 	ret = ice_setup_mirror_vsi(mif);
3692 	if (ret)
3693 		goto out;
3694 
3695 	/* Determine name for new interface:
3696 	 * (base interface name)(modifier name)(modifier unit number)
3697 	 * e.g. for ice0 with a new mirror interface (modifier m)
3698 	 * of index 0, this equals "ice0m0"
3699 	 */
3700 	sb = sbuf_new_auto();
3701 	MPASS(sb != NULL);
3702 	sbuf_printf(sb, "%sm", device_get_nameunit(dev));
3703 	sbuf_finish(sb);
3704 
3705 	bus_topo_lock();
3706 	mif->subdev = device_add_child(dev, sbuf_data(sb), 0);
3707 	bus_topo_unlock();
3708 
3709 	if (!mif->subdev) {
3710 		device_printf(dev, "device_add_child failed for %s0\n", sbuf_data(sb));
3711 		sbuf_delete(sb);
3712 		free(mif, M_ICE);
3713 		sc->mirr_if = NULL;
3714 		return (ENOMEM);
3715 	}
3716 	sbuf_delete(sb);
3717 
3718 	device_set_driver(mif->subdev, &ice_subif_driver);
3719 
3720 	/* Use iflib_device_register() directly because the driver already
3721 	 * has an initialized softc to pass to iflib
3722 	 */
3723 	ret = iflib_device_register(mif->subdev, mif, &ice_subif_sctx, &mif->subctx);
3724 	if (ret)
3725 		goto out;
3726 
3727 	/* Indicate that created interface will be just for monitoring */
3728 	mif->ifp = iflib_get_ifp(mif->subctx);
3729 	if_setflagbits(mif->ifp, IFF_MONITOR, 0);
3730 
3731 	/* Use autoselect media by default */
3732 	media = iflib_get_media(mif->subctx);
3733 	ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
3734 	ifmedia_set(media, IFM_ETHER | IFM_AUTO);
3735 
3736 	device_printf(dev, "Created dev %s and ifnet %s for mirroring\n",
3737 	    device_get_nameunit(mif->subdev), if_name(mif->ifp));
3738 
3739 	ice_add_vsi_sysctls(mif->vsi);
3740 
3741 	ret = ice_wire_mirror_intrs(mif);
3742 	if (ret)
3743 		goto out;
3744 
3745 	mif->if_attached = true;
3746 	return (0);
3747 
3748 out:
3749 	ice_destroy_mirror_interface(sc);
3750 	return (ret);
3751 }
3752 
3753 /**
3754  * ice_wire_mirror_intrs
3755  * @mif: driver private subinterface structure
3756  *
3757  * Helper function that sets up driver interrupt data and calls
3758  * into iflib in order to setup interrupts in its data structures as well.
3759  *
3760  * Like ice_if_msix_intr_assign, currently requires that we get at least the same
3761  * number of vectors as we have queues, and that we always have the same number
3762  * of Tx and Rx queues. Unlike that function, this calls a special
3763  * iflib_irq_alloc_generic_subif() function for RX interrupts because the
3764  * driver needs to get MSI-X resources from the parent device.
3765  *
3766  * Tx queues use a softirq instead of using their own hardware interrupt so that
3767  * remains unchanged.
3768  *
3769  * Returns 0 on success or an error code from iflib_irq_alloc_generic_subctx()
3770  * on failure.
3771  */
3772 static int
3773 ice_wire_mirror_intrs(struct ice_mirr_if *mif)
3774 {
3775 	struct ice_softc *sc = mif->back;
3776 	struct ice_hw *hw = &sc->hw;
3777 	struct ice_vsi *vsi = mif->vsi;
3778 	device_t dev = mif->subdev;
3779 	int err, i, rid;
3780 
3781 	if_ctx_t ctx = mif->subctx;
3782 
3783 	ice_debug(hw, ICE_DBG_INIT, "%s: Last rid: %d\n", __func__, sc->last_rid);
3784 
3785 	rid = sc->last_rid + 1;
3786 	for (i = 0; i < vsi->num_rx_queues; i++, rid++) {
3787 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
3788 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
3789 		char irq_name[16];
3790 
3791 		// TODO: Change to use dynamic interface number
3792 		snprintf(irq_name, sizeof(irq_name), "m0rxq%d", i);
3793 		/* First arg is parent device (physical port's) iflib ctx */
3794 		err = iflib_irq_alloc_generic_subctx(sc->ctx, ctx,
3795 		    &mif->rx_irqvs[i].irq, rid, IFLIB_INTR_RXTX, ice_msix_que,
3796 		    rxq, rxq->me, irq_name);
3797 		if (err) {
3798 			device_printf(dev,
3799 			    "Failed to allocate q int %d err: %s\n",
3800 			    i, ice_err_str(err));
3801 			i--;
3802 			goto fail;
3803 		}
3804 		MPASS(rid - 1 > 0);
3805 		/* Set vector number used in interrupt enable/disable functions */
3806 		mif->rx_irqvs[i].me = rid - 1;
3807 		rxq->irqv = &mif->rx_irqvs[i];
3808 
3809 		bzero(irq_name, sizeof(irq_name));
3810 		snprintf(irq_name, sizeof(irq_name), "m0txq%d", i);
3811 		iflib_softirq_alloc_generic(ctx, &mif->rx_irqvs[i].irq,
3812 		    IFLIB_INTR_TX, txq, txq->me, irq_name);
3813 		txq->irqv = &mif->rx_irqvs[i];
3814 	}
3815 
3816 	sc->last_rid = rid - 1;
3817 
3818 	ice_debug(hw, ICE_DBG_INIT, "%s: New last rid: %d\n", __func__,
3819 	    sc->last_rid);
3820 
3821 	return (0);
3822 
3823 fail:
3824 	for (; i >= 0; i--)
3825 		iflib_irq_free(ctx, &mif->rx_irqvs[i].irq);
3826 	return (err);
3827 }
3828 
3829 /**
3830  * ice_subif_rebuild - Rebuild subinterface post reset
3831  * @sc: The device private softc
3832  *
3833  * Restore subinterface state after a reset occurred.
3834  * Restart the VSI and enable the mirroring.
3835  */
3836 static int
3837 ice_subif_rebuild(struct ice_softc *sc)
3838 {
3839 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(sc->ctx);
3840 	struct ice_vsi *vsi = sc->mirr_if->vsi;
3841 	int err;
3842 
3843 	err = ice_subif_rebuild_vsi_qmap(sc);
3844 	if (err) {
3845 		device_printf(sc->dev, "Unable to re-assign mirror VSI queues, err %s\n",
3846 		      ice_err_str(err));
3847 		return (err);
3848 	}
3849 
3850 	err = ice_initialize_vsi(vsi);
3851 	if (err) {
3852 		device_printf(sc->dev, "Unable to re-initialize mirror VSI, err %s\n",
3853 		      ice_err_str(err));
3854 		goto err_release_queue_allocations_subif;
3855 	}
3856 
3857 	err = ice_config_rss(vsi);
3858 	if (err) {
3859 		device_printf(sc->dev,
3860 		      "Unable to reconfigure RSS for the mirror VSI, err %s\n",
3861 		      ice_err_str(err));
3862 		goto err_deinit_subif_vsi;
3863 	}
3864 
3865 	vsi->mirror_src_vsi = sc->pf_vsi.idx;
3866 
3867 	err = ice_setup_vsi_mirroring(vsi);
3868 	if (err) {
3869 		device_printf(sc->dev,
3870 		      "Unable to configure mirroring for VSI: %s\n",
3871 		      ice_err_str(err));
3872 		goto err_deinit_subif_vsi;
3873 	}
3874 
3875 	ice_set_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT);
3876 
3877 	return (0);
3878 
3879 err_deinit_subif_vsi:
3880 	ice_deinit_vsi(vsi);
3881 err_release_queue_allocations_subif:
3882 	ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap,
3883 	    sc->mirr_if->num_irq_vectors);
3884 	ice_resmgr_release_map(&sc->rx_qmgr, vsi->rx_qmap,
3885 	    sc->mirr_if->num_irq_vectors);
3886 
3887 	return (err);
3888 }
3889 
3890 /**
3891  * ice_subif_rebuild_vsi_qmap - Rebuild the mirror VSI queue mapping
3892  * @sc: the device softc pointer
3893  *
3894  * Loops over the Tx and Rx queues for the mirror VSI and reassigns the queue
3895  * mapping after a reset occurred.
3896  */
3897 static int
3898 ice_subif_rebuild_vsi_qmap(struct ice_softc *sc)
3899 {
3900 	struct ice_vsi *vsi = sc->mirr_if->vsi;
3901 	struct ice_tx_queue *txq;
3902 	struct ice_rx_queue *rxq;
3903 	int err, i;
3904 
3905 	err = ice_resmgr_assign_scattered(&sc->tx_qmgr, vsi->tx_qmap, sc->mirr_if->num_irq_vectors);
3906 	if (err) {
3907 		device_printf(sc->dev, "Unable to assign mirror VSI Tx queues: %s\n",
3908 		      ice_err_str(err));
3909 		return (err);
3910 	}
3911 
3912 	err = ice_resmgr_assign_scattered(&sc->rx_qmgr, vsi->rx_qmap, sc->mirr_if->num_irq_vectors);
3913 	if (err) {
3914 		device_printf(sc->dev, "Unable to assign mirror VSI Rx queues: %s\n",
3915 		      ice_err_str(err));
3916 		goto err_release_tx_queues;
3917 	}
3918 
3919 	vsi->qmap_type = ICE_RESMGR_ALLOC_SCATTERED;
3920 
3921 	/* Re-assign Tx queue tail pointers */
3922 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
3923 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
3924 
3925 	/* Re-assign Rx queue tail pointers */
3926 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
3927 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
3928 
3929 	return (0);
3930 
3931 err_release_tx_queues:
3932 	ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap, vsi->num_tx_queues);
3933 
3934 	return (err);
3935 }
3936 
3937 /**
3938  * ice_subif_if_tx_queues_alloc - Allocate Tx queue memory for subinterfaces
3939  * @ctx: iflib context structure
3940  * @vaddrs: virtual addresses for the queue memory
3941  * @paddrs: physical addresses for the queue memory
3942  * @ntxqs: the number of Tx queues per set (should always be 1)
3943  * @ntxqsets: the number of Tx queue sets to allocate
3944  *
3945  * See ice_if_tx_queues_alloc() description. Similar to that function, but
3946  * for subinterfaces instead.
3947  */
3948 static int
3949 ice_subif_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
3950 			     int __invariant_only ntxqs, int ntxqsets)
3951 {
3952 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3953 	struct ice_tx_queue *txq;
3954 	device_t dev = mif->subdev;
3955 	struct ice_vsi *vsi;
3956 	int err, i, j;
3957 
3958 	MPASS(mif != NULL);
3959 	MPASS(ntxqs == 1);
3960 	MPASS(mif->subscctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
3961 
3962 	vsi = mif->vsi;
3963 
3964 	MPASS(vsi->num_tx_queues == ntxqsets);
3965 
3966 	/* Allocate queue structure memory */
3967 	if (!(vsi->tx_queues =
3968 	      (struct ice_tx_queue *)malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
3969 		device_printf(dev, "%s: Unable to allocate Tx queue memory for subfunction\n",
3970 		    __func__);
3971 		return (ENOMEM);
3972 	}
3973 
3974 	/* Allocate report status arrays */
3975 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
3976 		if (!(txq->tx_rsq =
3977 		      (uint16_t *)malloc(sizeof(uint16_t) * mif->subscctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
3978 			device_printf(dev,
3979 			    "%s: Unable to allocate tx_rsq memory for subfunction\n", __func__);
3980 			err = ENOMEM;
3981 			goto free_tx_queues;
3982 		}
3983 		/* Initialize report status array */
3984 		for (j = 0; j < mif->subscctx->isc_ntxd[0]; j++)
3985 			txq->tx_rsq[j] = QIDX_INVALID;
3986 	}
3987 
3988 	/* Add Tx queue sysctls context */
3989 	ice_vsi_add_txqs_ctx(vsi);
3990 
3991 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
3992 		/* q_handle == me when only one TC */
3993 		txq->me = txq->q_handle = i;
3994 		txq->vsi = vsi;
3995 
3996 		/* store the queue size for easier access */
3997 		txq->desc_count = mif->subscctx->isc_ntxd[0];
3998 
3999 		/* get the virtual and physical address of the hardware queues */
4000 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
4001 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
4002 		txq->tx_paddr = paddrs[i];
4003 
4004 		ice_add_txq_sysctls(txq);
4005 	}
4006 
4007 	return (0);
4008 
4009 free_tx_queues:
4010 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
4011 		if (txq->tx_rsq != NULL) {
4012 			free(txq->tx_rsq, M_ICE);
4013 			txq->tx_rsq = NULL;
4014 		}
4015 	}
4016 	free(vsi->tx_queues, M_ICE);
4017 	vsi->tx_queues = NULL;
4018 	return (err);
4019 }
4020 
4021 /**
4022  * ice_subif_if_rx_queues_alloc - Allocate Rx queue memory for subinterfaces
4023  * @ctx: iflib context structure
4024  * @vaddrs: virtual addresses for the queue memory
4025  * @paddrs: physical addresses for the queue memory
4026  * @nrxqs: number of Rx queues per set (should always be 1)
4027  * @nrxqsets: number of Rx queue sets to allocate
4028  *
4029  * See ice_if_rx_queues_alloc() for general summary; this is similar to that
4030  * but implemented for subinterfaces.
4031  */
4032 static int
4033 ice_subif_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
4034     int __invariant_only nrxqs, int nrxqsets)
4035 {
4036 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4037 	struct ice_rx_queue *rxq;
4038 	device_t dev = mif->subdev;
4039 	struct ice_vsi *vsi;
4040 	int i;
4041 
4042 	MPASS(mif != NULL);
4043 	MPASS(nrxqs == 1);
4044 	MPASS(mif->subscctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
4045 
4046 	vsi = mif->vsi;
4047 
4048 	MPASS(vsi->num_rx_queues == nrxqsets);
4049 
4050 	/* Allocate queue structure memory */
4051 	if (!(vsi->rx_queues =
4052 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
4053 		device_printf(dev, "%s: Unable to allocate Rx queue memory for subfunction\n",
4054 		    __func__);
4055 		return (ENOMEM);
4056 	}
4057 
4058 	/* Add Rx queue sysctls context */
4059 	ice_vsi_add_rxqs_ctx(vsi);
4060 
4061 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
4062 		rxq->me = i;
4063 		rxq->vsi = vsi;
4064 
4065 		/* store the queue size for easier access */
4066 		rxq->desc_count = mif->subscctx->isc_nrxd[0];
4067 
4068 		/* get the virtual and physical address of the hardware queues */
4069 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
4070 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
4071 		rxq->rx_paddr = paddrs[i];
4072 
4073 		ice_add_rxq_sysctls(rxq);
4074 	}
4075 
4076 	return (0);
4077 }
4078 
4079 /**
4080  * ice_subif_if_msix_intr_assign - Assign MSI-X interrupts to new sub interface
4081  * @ctx: the iflib context structure
4082  * @msix: the number of vectors we were assigned
4083  *
4084  * Allocates and assigns driver private resources for MSI-X interrupt tracking.
4085  *
4086  * @pre OS MSI-X resources have been pre-allocated by parent interface.
4087  */
4088 static int
4089 ice_subif_if_msix_intr_assign(if_ctx_t ctx, int msix)
4090 {
4091 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4092 	struct ice_softc *sc = mif->back;
4093 	struct ice_vsi *vsi = mif->vsi;
4094 
4095 	device_t dev = mif->subdev;
4096 	int ret;
4097 
4098 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
4099 		device_printf(dev,
4100 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
4101 			      vsi->num_tx_queues, vsi->num_rx_queues);
4102 		return (EOPNOTSUPP);
4103 	}
4104 
4105 	if (msix > sc->extra_vectors) {
4106 		device_printf(dev,
4107 		     "%s: Not enough spare (%d) msix vectors for new sub-interface requested (%d)\n",
4108 		     __func__, sc->extra_vectors, msix);
4109 		return (ENOSPC);
4110 	}
4111 	device_printf(dev, "%s: Using %d vectors for sub-interface\n", __func__,
4112 	    msix);
4113 
4114 	/* Allocate space to store the IRQ vector data */
4115 	mif->num_irq_vectors = vsi->num_rx_queues;
4116 	mif->rx_irqvs = (struct ice_irq_vector *)
4117 	    malloc(sizeof(struct ice_irq_vector) * (mif->num_irq_vectors),
4118 		   M_ICE, M_NOWAIT);
4119 	if (!mif->rx_irqvs) {
4120 		device_printf(dev,
4121 			      "Unable to allocate RX irqv memory for mirror's %d vectors\n",
4122 			      mif->num_irq_vectors);
4123 		return (ENOMEM);
4124 	}
4125 
4126 	/* Assign mirror interface interrupts from PF device space */
4127 	if (!(mif->if_imap =
4128 	      (u16 *)malloc(sizeof(u16) * mif->num_irq_vectors,
4129 	      M_ICE, M_NOWAIT))) {
4130 		device_printf(dev, "Unable to allocate mirror intfc if_imap memory\n");
4131 		ret = ENOMEM;
4132 		goto free_irqvs;
4133 	}
4134 	ret = ice_resmgr_assign_contiguous(&sc->dev_imgr, mif->if_imap, mif->num_irq_vectors);
4135 	if (ret) {
4136 		device_printf(dev, "Unable to assign mirror intfc PF device interrupt mapping: %s\n",
4137 			      ice_err_str(ret));
4138 		goto free_if_imap;
4139 	}
4140 	/* Assign mirror interface interrupts from OS interrupt allocation space */
4141 	if (!(mif->os_imap =
4142 	      (u16 *)malloc(sizeof(u16) * mif->num_irq_vectors,
4143 	      M_ICE, M_NOWAIT))) {
4144 		device_printf(dev, "Unable to allocate mirror intfc os_imap memory\n");
4145 		ret = ENOMEM;
4146 		goto free_if_imap;
4147 	}
4148 	ret = ice_resmgr_assign_contiguous(&sc->os_imgr, mif->os_imap, mif->num_irq_vectors);
4149 	if (ret) {
4150 		device_printf(dev, "Unable to assign mirror intfc OS interrupt mapping: %s\n",
4151 			      ice_err_str(ret));
4152 		goto free_if_imap;
4153 	}
4154 
4155 	return (0);
4156 
4157 free_if_imap:
4158 	free(mif->if_imap, M_ICE);
4159 	mif->if_imap = NULL;
4160 free_irqvs:
4161 	free(mif->rx_irqvs, M_ICE);
4162 	mif->rx_irqvs = NULL;
4163 	return (ret);
4164 }
4165 
4166 /**
4167  * ice_subif_if_intr_enable - Enable device interrupts for a subinterface
4168  * @ctx: iflib context structure
4169  *
4170  * Called by iflib to request enabling all interrupts that belong to a
4171  * subinterface.
4172  */
4173 static void
4174 ice_subif_if_intr_enable(if_ctx_t ctx)
4175 {
4176 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4177 	struct ice_softc *sc = mif->back;
4178 	struct ice_vsi *vsi = mif->vsi;
4179 	struct ice_hw *hw = &sc->hw;
4180 
4181 	/* Do not enable queue interrupts in recovery mode */
4182 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4183 		return;
4184 
4185 	/* Enable all queue interrupts */
4186 	for (int i = 0; i < vsi->num_rx_queues; i++)
4187 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
4188 }
4189 
4190 /**
4191  * ice_subif_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
4192  * @ctx: iflib context structure
4193  * @rxqid: the Rx queue to enable
4194  *
4195  * Enable a specific Rx queue interrupt.
4196  *
4197  * This function is not protected by the iflib CTX lock.
4198  */
4199 static int
4200 ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
4201 {
4202 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4203 	struct ice_softc *sc = mif->back;
4204 	struct ice_vsi *vsi = mif->vsi;
4205 	struct ice_hw *hw = &sc->hw;
4206 
4207 	/* Do not enable queue interrupts in recovery mode */
4208 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4209 		return (ENOSYS);
4210 
4211 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
4212 	return (0);
4213 }
4214 
4215 /**
4216  * ice_subif_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
4217  * @ctx: iflib context structure
4218  * @txqid: the Tx queue to enable
4219  *
4220  * Enable a specific Tx queue interrupt.
4221  *
4222  * This function is not protected by the iflib CTX lock.
4223  */
4224 static int
4225 ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
4226 {
4227 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4228 	struct ice_softc *sc = mif->back;
4229 	struct ice_vsi *vsi = mif->vsi;
4230 	struct ice_hw *hw = &sc->hw;
4231 
4232 	/* Do not enable queue interrupts in recovery mode */
4233 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4234 		return (ENOSYS);
4235 
4236 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
4237 	return (0);
4238 }
4239 
4240 /**
4241  * ice_subif_if_init - Initialize the subinterface
4242  * @ctx: iflib ctx structure
4243  *
4244  * Called by iflib to bring the device up, i.e. ifconfig ice0m0 up.
4245  * Prepares the Tx and Rx engines and enables interrupts.
4246  *
4247  * @pre assumes the caller holds the iflib CTX lock
4248  */
4249 static void
4250 ice_subif_if_init(if_ctx_t ctx)
4251 {
4252 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4253 	struct ice_softc *sc = mif->back;
4254 	struct ice_vsi *vsi = mif->vsi;
4255 	device_t dev = mif->subdev;
4256 	int err;
4257 
4258 	if (ice_driver_is_detaching(sc))
4259 		return;
4260 
4261 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4262 		return;
4263 
4264 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
4265 		device_printf(dev,
4266 		    "request to start interface cannot be completed as the parent device %s failed to reset\n",
4267 		    device_get_nameunit(sc->dev));
4268 		return;
4269 	}
4270 
4271 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
4272 		device_printf(dev,
4273 		    "request to start interface cannot be completed while parent device %s is prepared for impending reset\n",
4274 		    device_get_nameunit(sc->dev));
4275 		return;
4276 	}
4277 
4278 	/* XXX: Equiv to ice_update_rx_mbuf_sz */
4279 	vsi->mbuf_sz = iflib_get_rx_mbuf_sz(ctx);
4280 
4281 	/* Initialize software Tx tracking values */
4282 	ice_init_tx_tracking(vsi);
4283 
4284 	err = ice_cfg_vsi_for_tx(vsi);
4285 	if (err) {
4286 		device_printf(dev,
4287 			      "Unable to configure subif VSI for Tx: %s\n",
4288 			      ice_err_str(err));
4289 		return;
4290 	}
4291 
4292 	err = ice_cfg_vsi_for_rx(vsi);
4293 	if (err) {
4294 		device_printf(dev,
4295 			      "Unable to configure subif VSI for Rx: %s\n",
4296 			      ice_err_str(err));
4297 		goto err_cleanup_tx;
4298 	}
4299 
4300 	err = ice_control_all_rx_queues(vsi, true);
4301 	if (err) {
4302 		device_printf(dev,
4303 			      "Unable to enable subif Rx rings for receive: %s\n",
4304 			      ice_err_str(err));
4305 		goto err_cleanup_tx;
4306 	}
4307 
4308 	ice_configure_all_rxq_interrupts(vsi);
4309 	ice_configure_rx_itr(vsi);
4310 
4311 	ice_set_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED);
4312 	return;
4313 
4314 err_cleanup_tx:
4315 	ice_vsi_disable_tx(vsi);
4316 }
4317 
4318 /**
4319  * ice_if_stop_subif - Stop the subinterface
4320  * @ctx: iflib context structure
4321  * @ifs: subinterface context structure
4322  *
4323  * Called by iflib to stop the subinterface and bring it down.
4324  * (e.g. ifconfig ice0m0 down)
4325  *
4326  * @pre assumes the caller holds the iflib CTX lock
4327  */
4328 static void
4329 ice_subif_if_stop(if_ctx_t ctx)
4330 {
4331 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4332 	struct ice_softc *sc = mif->back;
4333 	struct ice_vsi *vsi = mif->vsi;
4334 	device_t dev = mif->subdev;
4335 
4336 	if (!ice_testandclear_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED))
4337 		return;
4338 
4339 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
4340 		device_printf(dev,
4341 		    "request to stop interface cannot be completed as the parent device %s failed to reset\n",
4342 		    device_get_nameunit(sc->dev));
4343 		return;
4344 	}
4345 
4346 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
4347 		device_printf(dev,
4348 		    "request to stop interface cannot be completed while parent device %s is prepared for impending reset\n",
4349 		    device_get_nameunit(sc->dev));
4350 		return;
4351 	}
4352 
4353 	/* Dissociate the Tx and Rx queues from the interrupts */
4354 	ice_flush_txq_interrupts(vsi);
4355 	ice_flush_rxq_interrupts(vsi);
4356 
4357 	/* Disable the Tx and Rx queues */
4358 	ice_vsi_disable_tx(vsi);
4359 	ice_control_all_rx_queues(vsi, false);
4360 }
4361 
4362 /**
4363  * ice_free_irqvs_subif - Free IRQ vector memory for subinterfaces
4364  * @mif: Mirror interface private structure
4365  *
4366  * Free IRQ vector memory allocated during ice_subif_if_msix_intr_assign.
4367  */
4368 static void
4369 ice_free_irqvs_subif(struct ice_mirr_if *mif)
4370 {
4371 	struct ice_softc *sc = mif->back;
4372 	struct ice_vsi *vsi = mif->vsi;
4373 	if_ctx_t ctx = sc->ctx;
4374 	int i;
4375 
4376 	/* If the irqvs array is NULL, then there are no vectors to free */
4377 	if (mif->rx_irqvs == NULL)
4378 		return;
4379 
4380 	/* Free the IRQ vectors -- currently subinterfaces have number
4381 	 * of vectors equal to number of RX queues
4382 	 *
4383 	 * XXX: ctx is parent device's ctx, not the subinterface ctx
4384 	 */
4385 	for (i = 0; i < vsi->num_rx_queues; i++)
4386 		iflib_irq_free(ctx, &mif->rx_irqvs[i].irq);
4387 
4388 	ice_resmgr_release_map(&sc->os_imgr, mif->os_imap,
4389 	    mif->num_irq_vectors);
4390 	ice_resmgr_release_map(&sc->dev_imgr, mif->if_imap,
4391 	    mif->num_irq_vectors);
4392 
4393 	sc->last_rid -= vsi->num_rx_queues;
4394 
4395 	/* Clear the irqv pointers */
4396 	for (i = 0; i < vsi->num_rx_queues; i++)
4397 		vsi->rx_queues[i].irqv = NULL;
4398 
4399 	for (i = 0; i < vsi->num_tx_queues; i++)
4400 		vsi->tx_queues[i].irqv = NULL;
4401 
4402 	/* Release the vector array memory */
4403 	free(mif->rx_irqvs, M_ICE);
4404 	mif->rx_irqvs = NULL;
4405 }
4406 
4407 /**
4408  * ice_subif_if_queues_free - Free queue memory for subinterfaces
4409  * @ctx: the iflib context structure
4410  *
4411  * Free queue memory allocated by ice_subif_tx_queues_alloc() and
4412  * ice_subif_if_rx_queues_alloc().
4413  */
4414 static void
4415 ice_subif_if_queues_free(if_ctx_t ctx)
4416 {
4417 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4418 	struct ice_vsi *vsi = mif->vsi;
4419 	struct ice_tx_queue *txq;
4420 	int i;
4421 
4422 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
4423 	 * pointers.
4424 	 */
4425 	ice_vsi_del_txqs_ctx(vsi);
4426 	ice_vsi_del_rxqs_ctx(vsi);
4427 
4428 	/* Release MSI-X IRQ vectors */
4429 	ice_free_irqvs_subif(mif);
4430 
4431 	if (vsi->tx_queues != NULL) {
4432 		/* free the tx_rsq arrays */
4433 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
4434 			if (txq->tx_rsq != NULL) {
4435 				free(txq->tx_rsq, M_ICE);
4436 				txq->tx_rsq = NULL;
4437 			}
4438 		}
4439 		free(vsi->tx_queues, M_ICE);
4440 		vsi->tx_queues = NULL;
4441 	}
4442 	if (vsi->rx_queues != NULL) {
4443 		free(vsi->rx_queues, M_ICE);
4444 		vsi->rx_queues = NULL;
4445 	}
4446 }
4447 
4448 /**
4449  * ice_subif_if_media_status - Report subinterface media
4450  * @ctx: iflib context structure
4451  * @ifmr: ifmedia request structure to update
4452  *
4453  * Updates the provided ifmr with something, in order to prevent a
4454  * "no media types?" message from ifconfig.
4455  *
4456  * Mirror interfaces are always up.
4457  */
4458 static void
4459 ice_subif_if_media_status(if_ctx_t ctx __unused, struct ifmediareq *ifmr)
4460 {
4461 	ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE;
4462 	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
4463 }
4464 
4465 /**
4466  * ice_subif_if_promisc_set - Set subinterface promiscuous mode
4467  * @ctx: iflib context structure
4468  * @flags: promiscuous flags to configure
4469  *
4470  * Called by iflib to configure device promiscuous mode.
4471  *
4472  * @remark This does not need to be implemented for now.
4473  */
4474 static int
4475 ice_subif_if_promisc_set(if_ctx_t ctx __unused, int flags __unused)
4476 {
4477 	return (0);
4478 }
4479 
4480